From 80beb26716f4548387613fca460f6cdad5809dfe Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Sun, 27 Oct 2024 22:33:31 -0500
Subject: [PATCH 01/10] implementation of variable multidimantional binning,
 expanding definition of the Map object to allow also contain variable binning
 histograms

---
 pisa/core/binning.py   | 300 ++++++++++++++++++++++++++++++
 pisa/core/container.py |  19 +-
 pisa/core/map.py       | 412 +++++++++++++++++++++++++++++------------
 pisa/core/pipeline.py  |   4 +-
 4 files changed, 604 insertions(+), 131 deletions(-)

diff --git a/pisa/core/binning.py b/pisa/core/binning.py
index f2849d055..bfc5e2330 100644
--- a/pisa/core/binning.py
+++ b/pisa/core/binning.py
@@ -3042,6 +3042,306 @@ def __len__(self):
     def __ne__(self, other):
         return not self.__eq__(other)
 
+class EventSpecie(object):
+    # pylint: disable=line-too-long
+    r"""
+    Definition of an EventSpecie object. An EventSpecie object has following properties:
+    
+    * name : name of the specie
+    
+    * selection : selection criteria (e.g. cuts, flags)
+    
+    * binning : binning to use for this specie of events
+    
+    """
+    # pylint: enable=line-too-long
+    def __init__(self, binning, name=None, selection=None):
+        self.name = name
+        self.selection = selection # TODO: format?
+        self.binning = binning # sould be MultiDimBinning (maybe OneDim too?)
+        # TODO: add checks for proper format, type
+        self._serializable_state = None
+
+    @property
+    def serializable_state(self):
+        """OrderedDict containing savable state attributes"""
+        if self._serializable_state is None:
+            state = OrderedDict()
+            state['name'] = self.name
+            state['selection'] = self.selection
+            state['binning'] = self.binning.serializable_state
+            self._serializable_state = state
+        # Since the tex property can be modified, must set every time this
+        # property is called
+        return self._serializable_state
+
+    @property
+    def hashable_state(self):
+        """OrderedDict containing savable state attributes"""
+        return self.serializable_state
+
+    def __repr__(self):
+        previous_precision = np.get_printoptions()['precision']
+        np.set_printoptions(precision=18)
+        try:
+            argstrs = [('%s=%r' %item) for item in
+                       self.serializable_state.items()]
+            r = '%s(%s)' %(self.__class__.__name__, ',\n    '.join(argstrs))
+        finally:
+            np.set_printoptions(precision=previous_precision)
+        return r
+
+class VarMultiDimBinning(object):
+    # pylint: disable=line-too-long
+    r"""
+    Multi-dimentional binning with possibility of splitting sample is several event types
+    
+    """
+    # pylint: enable=line-too-long
+
+    _attrs_to_create_new = ('name', 'tex', 'bin_edges', 'is_log', 'is_lin', 'bin_names')
+
+    def __init__(self, event_species, name=None, mask=None):
+
+        if isinstance(event_species, MultiDimBinning):
+            species = [EventSpecie(name='all', selection=None, binning=event_species)]
+        elif isinstance(event_species, Sequence):
+            species = []
+            for obj_num, obj in enumerate(event_species):
+                if isinstance(obj, EventSpecie):
+                    species.append(obj)
+                elif isinstance(obj, MultiDimBinning):
+                    species.append(EventSpecie(name='specie '+str(obj_num), 
+                                               selection=None, binning=obj))
+                else: 
+                    TypeError('Argument/object #%d unhandled type: %s'%(obj_num, type(obj)))
+
+        self._species = tuple(species)
+        self._name = name
+        self._names = None
+        self._binnings = None
+        self._selections = None
+        self._shape = None
+        self._hashable_state = None
+
+        self.mask = None # not implemented, here so Map does not complain
+
+    @property
+    def shape(self):
+        """Variable binning shape. Defined as ..."""
+        if self._shape is None:
+            self._shape = [b.shape for b in self.binnings]
+        return self._shape
+
+    @property
+    def name(self):
+        """Name of the species"""
+        return self._name
+
+    @property
+    def species(self):
+        """list of the species"""
+        return self._species
+
+    @property
+    def names(self):
+        """list of strings : names of each dimension contained"""
+        if self._names is None:
+            self._names = [specie.name for specie in self._species]
+        return self._names
+
+    @property
+    def binnings(self):
+        """list of MultiDimBinning : binnings of each dimension contained"""
+        if self._binnings is None:
+            self._binnings = [specie.binning for specie in self._species]
+        return self._binnings
+
+    @property
+    def selections(self):
+        """list of strings : selections of each dimension contained"""
+        if self._selections is None:
+            self._selections = [specie.selection for specie in self._species]
+        return self._selections
+
+    @property
+    def serializable_state(self):
+
+        d = OrderedDict({'event_species': [d.serializable_state for d in self]})
+        d['name'] = self.name
+        return d
+
+    @property
+    def hashable_state(self):
+        """Everything necessary to fully describe this object's state. Note
+        that objects may be returned by reference, so to prevent external
+        modification, the user must call deepcopy() separately on the returned
+        OrderedDict.
+
+        Returns
+        -------
+        state : OrderedDict that can be passed to instantiate a new
+            VarMultiDimBinning via VarMultiDimBinning(**state)
+
+        """
+        if self._hashable_state is None:
+
+            state = OrderedDict()
+            # TODO: Shouldn't order matter?
+            #state['dimensions'] = [self[name]._hashable_state
+            #                       for name in sorted(self.names)]
+            state['event_species'] = [d.hashable_state for d in self.species]
+            state['name'] = self.name
+
+            self._hashable_state = state
+
+        return self._hashable_state
+
+    def assert_array_fits(self, array):
+        """Check if a 
+
+        """
+        if len(array) != len(self.shape):
+            raise ValueError(
+                'Array length %s does not match variable binning length %s'
+                % (len(array), len(self.shape))
+            )
+        for sp, array_el in zip(self.shape, array):
+            if array_el.shape != sp:
+                raise ValueError(
+                    'Array shape %s does not match binning shape %s'
+                    % (array_el.shape, sp)
+                )
+
+    def __repr__(self):
+        previous_precision = np.get_printoptions()['precision']
+        np.set_printoptions(precision=18)
+        try:
+            argstrs = [('%s=%r' %item) for item in
+                       self.serializable_state.items()]
+            r = '%s(%s)' %(self.__class__.__name__, ',\n    '.join(argstrs))
+        finally:
+            np.set_printoptions(precision=previous_precision)
+        return r
+
+    def __add__(self, other):
+        other = VarMultiDimBinning(other)
+        return VarMultiDimBinning(chain(self, other))
+
+    def __mul__(self, other):
+        if isinstance(other, MultiDimBinning):
+            other = [EventSpecie(name='all', selection=None, binning=other)]
+        other = VarMultiDimBinning(other)
+        return VarMultiDimBinning(chain(self, other))
+
+    def __getitem__(self, index):
+        """Return a new EventSpecie(s), sub-selected by `index`.
+
+        Parameters
+        ----------
+        index : int, slice, str 
+        if str has to be a name of event specie
+
+        Returns
+        -------
+        A new EventSpecie selected by `index`.
+
+        """
+
+        sp_names = self.names
+        mylen = len(sp_names)
+        orig_index = index
+
+        # indexing by name
+        if isinstance(index, str):
+            assert sp_names is not None
+            index = sp_names.index(index)
+            return deepcopy(self._species[index])
+
+        # indexing by int
+        if isinstance(index, int):
+            return deepcopy(self._species[index])
+    
+        # indexing by slice
+        if isinstance(index, slice):
+            index = list(range(*index.indices(mylen)))
+
+        if isinstance(index, Iterable):
+            if not isinstance(index, Sequence):
+                index = list(index)
+            for sp_index in index:
+                if isinstance(sp_index, str):
+                    raise ValueError('Slicing by seq of names currently not'
+                                     ' supported')
+            if not index:
+                raise ValueError('`index` "%s" results in no species being'
+                                 ' specified.' %orig_index)
+            if len(index) > 1 and not np.all(np.diff(index) == 1):
+                raise ValueError('Indices must be monotonically'
+                                 ' increasing and adjacent.')
+            new_species = []
+#             new_names = []
+#             new_selections = []
+#             new_binnings = []
+            for sp_index in index:
+                if sp_index < -mylen or sp_index >= mylen:
+                    raise ValueError(
+                        "Dimension '%s': bin index %s is invalid. Bin index"
+                        " must be >= %+d and <= %+d"
+                        %(self.name, bin_index, -mylen, mylen-1)
+                    )
+                new_species.append(deepcopy(self._species[sp_index]))
+#                 new_names.append(self._names[sp_index])
+#                 new_selections.append(self._selections[sp_index])
+#                 new_binnings.append(self._binnings[sp_index])
+        else:
+            raise TypeError('Unhandled index type %s' %type(orig_index))
+
+        return new_species
+
+        
+#     def __repr__(): # TODO
+
+#     @property
+#     def midpoints(self):
+#         """Return a list of the contained dimensions' midpoints"""
+#         return [d.midpoints for d in self]
+
+#     @property
+#     def weighted_centers(self):
+#         """Return a list of the contained dimensions' weighted_centers (e.g.
+#         equidistant from bin edges on logarithmic scale, if the binning is
+#         logarithmic; otherwise linear). Access `midpoints` attribute for
+#         always-linear alternative."""
+#         return [d.weighted_centers for d in self]
+
+#     @property
+#     def num_bins(self):
+#         """
+#         Return a list of the contained dimensions' num_bins.
+#         Note that this does not accpunt for any bin mask (since it is computed per dimension)
+#         """
+#         return [d.num_bins for d in self]
+
+#     @property
+#     def tot_num_bins(self):
+#         """
+#         Return total number of bins.
+#         If a bin mask is used, this will only count bins that are not masked off
+#         """
+#         if self.mask is None :
+#             return np.product(self.shape)
+#         else :
+#             return np.sum(self.mask.astype(int))
+
+#     @property
+#     def units(self):
+#         """list : Return a list of the contained dimensions' units"""
+#         return [d.units for d in self]
+
+#     def index(self, dim, use_basenames=False):
+
+
 
 def test_OneDimBinning():
     """Unit tests for OneDimBinning class"""
diff --git a/pisa/core/container.py b/pisa/core/container.py
index a98895167..f1802ca84 100644
--- a/pisa/core/container.py
+++ b/pisa/core/container.py
@@ -15,7 +15,7 @@
 import numpy as np
 
 from pisa import FTYPE
-from pisa.core.binning import OneDimBinning, MultiDimBinning
+from pisa.core.binning import OneDimBinning, MultiDimBinning, VarMultiDimBinning
 from pisa.core.map import Map, MapSet
 from pisa.core.translation import histogram, lookup, resample
 from pisa.utils.comparisons import ALLCLOSE_KW
@@ -33,7 +33,7 @@ class ContainerSet(object):
 
     containers : list or None
 
-    data_specs : MultiDimBinning, "events" or None
+    data_specs : MultiDimBinning, VarMultiDimBinning, "events" or None
 
     """
     def __init__(self, name, containers=None, data_specs=None):
@@ -67,7 +67,7 @@ def representation(self, representation):
         """
         Parameters
         ----------
-        representation : str, MultiDimBinning or any hashable object
+        representation : str, MultiDimBinning, VarMultiDimBinning or any hashable object
             Data specs should be set to retreive the right representation
             i.e. the representation one is working in at the moment
 
@@ -302,7 +302,7 @@ def representation(self, representation):
         key = hash(representation)
         if not key in self.representation_keys:
             self._representations[key] = representation
-            if isinstance(representation, MultiDimBinning):
+            if isinstance(representation, (MultiDimBinning,VarMultiDimBinning)):
                 for name in representation.names:
                     self.validity[name][key] = True
             elif isinstance(representation, str):
@@ -355,7 +355,7 @@ def all_keys(self):
     @property
     def is_map(self):
         '''Is current representation a map/grid'''
-        return isinstance(self.representation, MultiDimBinning)
+        return isinstance(self.representation, (MultiDimBinning,VarMultiDimBinning))
         
     def mark_changed(self, key):
         '''mark a key as changed and only what is in the current representation is valid'''
@@ -415,7 +415,7 @@ def __add_data(self, key, data):
 
         elif isinstance(data, Sequence) and len(data) == 2:
             binning, array = data
-            assert isinstance(binning, MultiDimBinning)
+            assert isinstance(binning, (MultiDimBinning,VarMultiDimBinning))
             
             assert hash(self.representation) == hash(binning)
                             
@@ -516,8 +516,8 @@ def translate(self, key, src_representation):
             # nothing to do
             return    
     
-        from_map = isinstance(src_representation, MultiDimBinning)
-        to_map = isinstance(dest_representation, MultiDimBinning)
+        from_map = isinstance(src_representation, (MultiDimBinning,VarMultiDimBinning))
+        to_map = isinstance(dest_representation, (MultiDimBinning,VarMultiDimBinning))
     
         if self.tranlation_modes[key] == 'average':            
             if from_map and to_map:
@@ -619,9 +619,10 @@ def array_to_binned(self, key, src_representation, dest_representation, averaged
         """
         # TODO: make work for n-dim
         logging.trace('Transforming %s array to binned data'%(key))
+#         print('calling container.array_to_binned() method')
         
         assert src_representation in self.array_representations
-        assert isinstance(dest_representation, MultiDimBinning)
+        assert isinstance(dest_representation, (MultiDimBinning,VarMultiDimBinning))
         
         if not dest_representation.is_irregular:
             sample = []
diff --git a/pisa/core/map.py b/pisa/core/map.py
index 55e9f189f..fa7af4089 100755
--- a/pisa/core/map.py
+++ b/pisa/core/map.py
@@ -30,7 +30,7 @@
 from uncertainties import unumpy as unp
 
 from pisa import ureg, HASH_SIGFIGS
-from pisa.core.binning import OneDimBinning, MultiDimBinning
+from pisa.core.binning import OneDimBinning, MultiDimBinning, VarMultiDimBinning
 from pisa.utils.comparisons import normQuant, recursiveEquality, ALLCLOSE_KW
 from pisa.utils.flavInt import NuFlavIntGroup
 from pisa.utils.hash import hash_obj
@@ -201,7 +201,7 @@ def new_function(*args, **kwargs):
                 new_state[slot] = state_updates[slot]
             else:
                 new_state[slot] = deepcopy(getattr(self, slot))
-        if len(new_state['binning']) == 0:
+        if not isinstance(new_state['binning'], VarMultiDimBinning) and len(new_state['binning']) == 0:
             return new_state['hist']
         return Map(**new_state)
     return decorate(original_function, new_function)
@@ -308,7 +308,8 @@ def __init__(self, name, hist, binning, error_hist=None, hash=None,
         super().__setattr__('_hash', hash)
         super().__setattr__('_full_comparison', full_comparison)
 
-        if not isinstance(binning, MultiDimBinning):
+        # TODO: add further checks for VarMultiDimBinning case
+        if not isinstance(binning, (MultiDimBinning, VarMultiDimBinning)): 
             if isinstance(binning, Sequence):
                 binning = MultiDimBinning(dimensions=binning)
             elif isinstance(binning, Mapping):
@@ -318,12 +319,22 @@ def __init__(self, name, hist, binning, error_hist=None, hash=None,
                                  ' type %s' %(binning, type(binning)))
         self.parent_indexer = None
 
+#         if isinstance(binning, VarMultiDimBinning):
+#             self.varbinning = True
+#         else:
+#             self.varbinning = False
+
         # Do the work here to set read-only attributes
         super().__setattr__('_binning', binning)
         binning.assert_array_fits(hist)
-        super().__setattr__(
-            '_hist', np.ascontiguousarray(hist)
-        )
+        if not isinstance(binning, VarMultiDimBinning):
+            super().__setattr__(
+                '_hist', np.ascontiguousarray(hist)
+            )
+        else:
+            super().__setattr__(
+                '_hist', [np.ascontiguousarray(hi) for hi in hist]
+            )
         if error_hist is not None:
             self.set_errors(error_hist)
         self._normalize_values = True
@@ -1548,12 +1559,21 @@ def llh(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            return stats.llh(actual_values=self.hist,
-                             expected_values=expected_values)
+            if is_not_vb:
+                return stats.llh(actual_values=self.hist,
+                                 expected_values=expected_values)
+            else:
+                return [stats.llh(actual_values=hi,
+                                 expected_values=exp_val_hi)\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        return np.sum(stats.llh(actual_values=self.hist,
-                                expected_values=expected_values))
+        if is_not_vb:
+            return np.sum(stats.llh(actual_values=self.hist,
+                                    expected_values=expected_values))
+        return np.sum([np.sum(stats.llh(actual_values=hi, expected_values=exp_val_hi))\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)])
 
     def mcllh_mean(self, expected_values, binned=False):
         """Calculate the total LMean log-likelihood value between this map and the
@@ -1574,12 +1594,21 @@ def mcllh_mean(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            return stats.mcllh_mean(actual_values=self.hist,
-                             expected_values=expected_values)
+            if is_not_vb:
+                return stats.mcllh_mean(actual_values=self.hist,
+                                 expected_values=expected_values)
+            else:
+                return [stats.mcllh_mean(actual_values=hi,
+                                 expected_values=exp_val_hi)\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        return np.sum(stats.mcllh_mean(actual_values=self.hist,
-                                expected_values=expected_values))
+        if is_not_vb:
+            return np.sum(stats.mcllh_mean(actual_values=self.hist,
+                                    expected_values=expected_values))
+        return np.sum([np.sum(stats.mcllh_mean(actual_values=hi, expected_values=exp_val_hi))\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)])
 
 
     def mcllh_eff(self, expected_values, binned=False):
@@ -1601,12 +1630,21 @@ def mcllh_eff(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            return stats.mcllh_eff(actual_values=self.hist,
-                             expected_values=expected_values)
+            if is_not_vb:
+                return stats.mcllh_eff(actual_values=self.hist,
+                                 expected_values=expected_values)
+            else:
+                return [stats.mcllh_eff(actual_values=hi,
+                                 expected_values=exp_val_hi)\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        return np.sum(stats.mcllh_eff(actual_values=self.hist,
-                                expected_values=expected_values))
+        if is_not_vb:
+            return np.sum(stats.mcllh_eff(actual_values=self.hist,
+                                    expected_values=expected_values))
+        return np.sum([np.sum(stats.mcllh_eff(actual_values=hi, expected_values=exp_val_hi))\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)])
 
     def conv_llh(self, expected_values, binned=False):
         """Calculate the total convoluted log-likelihood value between this map
@@ -1627,12 +1665,21 @@ def conv_llh(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            return stats.conv_llh(actual_values=self.hist,
-                                  expected_values=expected_values)
+            if is_not_vb:
+                return stats.conv_llh(actual_values=self.hist,
+                                 expected_values=expected_values)
+            else:
+                return [stats.conv_llh(actual_values=hi,
+                                 expected_values=exp_val_hi)\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        return np.sum(stats.conv_llh(actual_values=self.hist,
-                                     expected_values=expected_values))
+        if is_not_vb:
+            return np.sum(stats.conv_llh(actual_values=self.hist,
+                                    expected_values=expected_values))
+        return np.sum([np.sum(stats.conv_llh(actual_values=hi, expected_values=exp_val_hi))\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)])
 
     def barlow_llh(self, expected_values, binned=False):
         """Calculate the total barlow log-likelihood value between this map and
@@ -1659,12 +1706,22 @@ def barlow_llh(self, expected_values, binned=False):
         elif isinstance(expected_values, Iterable):
             expected_values = [reduceToHist(x) for x in expected_values]
 
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            return stats.barlow_llh(actual_values=self.hist,
-                                    expected_values=expected_values)
+            if is_not_vb:
+                return stats.barlow_llh(actual_values=self.hist,
+                                 expected_values=expected_values)
+            else:
+                return [stats.barlow_llh(actual_values=hi,
+                                 expected_values=exp_val_hi)\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)]
+
+        if is_not_vb:
+            return np.sum(stats.barlow_llh(actual_values=self.hist,
+                                    expected_values=expected_values))
+        return np.sum([np.sum(stats.barlow_llh(actual_values=hi, expected_values=exp_val_hi))\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)])
 
-        return np.sum(stats.barlow_llh(actual_values=self.hist,
-                                       expected_values=expected_values))
 
     def mod_chi2(self, expected_values, binned=False):
         """Calculate the total modified chi2 value between this map and the map
@@ -1685,12 +1742,21 @@ def mod_chi2(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            return stats.mod_chi2(actual_values=self.hist,
-                                  expected_values=expected_values)
+            if is_not_vb:
+                return stats.mod_chi2(actual_values=self.hist,
+                                 expected_values=expected_values)
+            else:
+                return [stats.mod_chi2(actual_values=hi,
+                                 expected_values=exp_val_hi)\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        return np.sum(stats.mod_chi2(actual_values=self.hist,
-                                     expected_values=expected_values))
+        if is_not_vb:
+            return np.sum(stats.mod_chi2(actual_values=self.hist,
+                                    expected_values=expected_values))
+        return np.sum([np.sum(stats.mod_chi2(actual_values=hi, expected_values=exp_val_hi))\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)])
 
     def correct_chi2(self, expected_values, binned=False):
         """Calculate the total correct chi2 value between this map and the map
@@ -1711,12 +1777,21 @@ def correct_chi2(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            return stats.correct_chi2(actual_values=self.hist,
-                                  expected_values=expected_values)
+            if is_not_vb:
+                return stats.correct_chi2(actual_values=self.hist,
+                                 expected_values=expected_values)
+            else:
+                return [stats.correct_chi2(actual_values=hi,
+                                 expected_values=exp_val_hi)\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        return np.sum(stats.correct_chi2(actual_values=self.hist,
-                                     expected_values=expected_values))
+        if is_not_vb:
+            return np.sum(stats.correct_chi2(actual_values=self.hist,
+                                    expected_values=expected_values))
+        return np.sum([np.sum(stats.correct_chi2(actual_values=hi, expected_values=exp_val_hi))\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)])
 
     def chi2(self, expected_values, binned=False):
         """Calculate the total chi-squared value between this map and the map
@@ -1737,12 +1812,21 @@ def chi2(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            return stats.chi2(actual_values=self.hist,
-                              expected_values=expected_values)
+            if is_not_vb:
+                return stats.chi2(actual_values=self.hist,
+                                 expected_values=expected_values)
+            else:
+                return [stats.chi2(actual_values=hi,
+                                 expected_values=exp_val_hi)\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        return np.sum(stats.chi2(actual_values=self.hist,
-                                 expected_values=expected_values))
+        if is_not_vb:
+            return np.sum(stats.chi2(actual_values=self.hist,
+                                    expected_values=expected_values))
+        return np.sum([np.sum(stats.chi2(actual_values=hi, expected_values=exp_val_hi))\
+                        for hi, exp_val_hi in zip(self.hist, expected_values)])
 
     def signed_sqrt_mod_chi2(self, expected_values):
         """Calculate the binwise (signed) square-root of the modified chi2 value
@@ -1761,8 +1845,13 @@ def signed_sqrt_mod_chi2(self, expected_values):
         """
         expected_values = reduceToHist(expected_values)
 
-        return stats.signed_sqrt_mod_chi2(actual_values=self.hist,
-                                          expected_values=expected_values)
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
+        if is_not_vb:
+            return stats.chi2(actual_values=self.hist,
+                             expected_values=expected_values)
+        return [stats.chi2(actual_values=hi,
+                         expected_values=exp_val_hi)\
+                for hi, exp_val_hi in zip(self.hist, expected_values)]
 
 
     def generalized_poisson_llh(self, expected_values=None, empty_bins=None, binned=False):
@@ -1785,6 +1874,9 @@ def generalized_poisson_llh(self, expected_values=None, empty_bins=None, binned=
         llh_per_bin = stats.generalized_poisson_llh(actual_values=self.hist,
                                                     expected_values=expected_values,
                                                     empty_bins=empty_bins)
+        #TODO: implement for VarMultiDimBinning
+        if isinstance(self.binning, VarMultiDimBinning):
+            raise ValueError("generalized_poisson_llh not implemented for Maps with VarMultiDimBinning")
 
         if binned:
             return llh_per_bin
@@ -1869,7 +1961,10 @@ def hist(self):
         # Apply bin mask, if one exists
         # Set masked off elements to NaN (handling both cases where the hst is either a simple array, or has uncertainties)
         if self.binning.mask is not None :
-            hist[~self.binning.mask] = ufloat(np.NaN, np.NaN) if isinstance(self._hist[np.unravel_index(0, self._hist.shape)], uncertainties.core.Variable) else np.NaN #TODO Is there a better way to check if this is a uarray?
+            if not isinstance(self.binning, VarMultiDimBinning):
+                hist[~self.binning.mask] = ufloat(np.NaN, np.NaN) if isinstance(self._hist[np.unravel_index(0, self._hist.shape)], uncertainties.core.Variable) else np.NaN #TODO Is there a better way to check if this is a uarray?
+            else:
+                raise ValueError("Masking for Maps with Variable binning is not implemented")
 
         # Done
         return hist
@@ -1877,12 +1972,18 @@ def hist(self):
     @property
     def nominal_values(self):
         """numpy.ndarray : Bin values stripped of uncertainties"""
-        return unp.nominal_values(self.hist)
+        if not isinstance(self.binning, VarMultiDimBinning):
+            return unp.nominal_values(self.hist)
+        else:
+            return [unp.nominal_values(hi) for hi in self.hist]
 
     @property
     def std_devs(self):
         """numpy.ndarray : Uncertainties (standard deviations) per bin"""
-        return unp.std_devs(self.hist)
+        if not isinstance(self.binning, VarMultiDimBinning):
+            return unp.std_devs(self.hist)
+        else:
+            return [unp.std_devs(hi) for hi in self.hist]
 
     @property
     def binning(self):
@@ -1903,33 +2004,42 @@ def full_comparison(self, value):
 
     @_new_obj
     def __abs__(self):
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "|%s|" % (self.name,),
             #'tex': r"{\left| %s \right|}" % strip_outer_parens(self.tex),
-            'hist': np.abs(self.hist)
+            'hist': np.abs(self.hist) if is_not_vb else \
+            [np.abs(hi) for hi in self.hist]
         }
         return state_updates
 
     @_new_obj
     def __add__(self, other):
         """Add `other` to self"""
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s + %s)" % (self.name, other),
                 #'tex': r"{(%s + %s)}" % (self.tex, other),
-                'hist': self.hist + other
+                'hist': self.hist + other if is_not_vb else \
+                [hi+other for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            state_updates = {
-                #'name': "(%s + array)" % self.name,
-                #'tex': r"{(%s + X)}" % self.tex,
-                'hist': self.hist + other
-            }
+            if is_not_vb:
+                state_updates = {
+                    #'name': "(%s + array)" % self.name,
+                    #'tex': r"{(%s + X)}" % self.tex,
+                    'hist': self.hist + other
+                }
+            else:
+                raise ValueError("Addition of numpy.ndarray is not supported \
+                for Map with VarMultiDimBinning")
         elif isinstance(other, Map):
             state_updates = {
                 #'name': "(%s + %s)" % (self.name, other.name),
                 #'tex': r"{(%s + %s)}" % (self.tex, other.tex),
-                'hist': self.hist + other.hist,
+                'hist': self.hist + other.hist if is_not_vb else \
+                [hi+other_hi for hi, other_hi in zip(self.hist,other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
             }
@@ -1939,25 +2049,66 @@ def __add__(self, other):
 
     #def __cmp__(self, other):
 
+
+    @_new_obj
+    def __sub__(self, other):
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
+        if np.isscalar(other) or type(other) is uncertainties.core.Variable:
+            state_updates = {
+                #'name': "(%s - %s)" % (self.name, other),
+                #'tex': "{(%s - %s)}" % (self.tex, other),
+                'hist': self.hist - other if is_not_vb else \
+                [hi-other for hi in self.hist],
+            }
+        elif isinstance(other, np.ndarray):
+            if is_not_vb:
+                state_updates = {
+                    #'name': "(%s - array)" % self.name,
+                    #'tex': "{(%s - X)}" % self.tex,
+                    'hist': self.hist - other,
+                }
+            else:
+                raise ValueError("Substraction of numpy.ndarray is not supported for \
+                Map with VarMultiDimBinning")
+        elif isinstance(other, Map):
+            state_updates = {
+                #'name': "%s - %s" % (self.name, other.name),
+                #'tex': "{(%s - %s)}" % (self.tex, other.tex),
+                'hist': self.hist - other.hist if is_not_vb else \
+                [hi-other_hi for hi, other_hi in zip(self.hist,other.hist)],
+                'full_comparison': (self.full_comparison or
+                                    other.full_comparison),
+            }
+        else:
+            type_error(other)
+        return state_updates
+
     @_new_obj
     def __div__(self, other):
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s / %s)" % (self.name, other),
                 #'tex': r"{(%s / %s)}" % (self.tex, other),
-                'hist': self.hist / other
+                'hist': self.hist / other if is_not_vb else \
+                [hi/other for hi in self.hist]
             }
         elif isinstance(other, np.ndarray):
-            state_updates = {
-                #'name': "(%s / array)" % self.name,
-                #'tex': r"{(%s / X)}" % self.tex,
-                'hist': self.hist / other
-            }
+            if is_not_vb:
+                state_updates = {
+                    #'name': "(%s / array)" % self.name,
+                    #'tex': r"{(%s / X)}" % self.tex,
+                    'hist': self.hist / other
+                }
+            else:
+                raise ValueError("Division by numpy.ndarray is not supported \
+                for Map with VarMultiDimBinning")
         elif isinstance(other, Map):
             state_updates = {
                 #'name': "(%s / %s)" % (self.name, other.name),
                 #'tex': r"{(%s / %s)}" % (self.tex, other.tex),
-                'hist': self.hist / other.hist,
+                'hist': self.hist / other.hist if is_not_vb else \
+                [hi/other_hi for hi, other_hi in zip(self.hist,other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
             }
@@ -1968,6 +2119,9 @@ def __div__(self, other):
     def __truediv__(self, other):
         return self.__div__(other)
 
+    def __rtruediv__(self, other):
+        return self.__rdiv__(other)
+
     def __floordiv__(self, other):
         raise NotImplementedError('floordiv not implemented for type Map')
 
@@ -1985,15 +2139,23 @@ def __eq__(self, other):
         Otherwise, simply checks that the hashes are equal.
 
         """
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other):
-            return np.all(self.nominal_values == other)
+            if is_not_vb:
+                return np.all(self.nominal_values == other)
+            else:
+                return np.all([np.all(nv==other) for nv in self.nominal_values])
 
         if type(other) is uncertainties.core.Variable \
                 or isinstance(other, np.ndarray):
-            return (np.all(self.nominal_values
-                           == unp.nominal_values(other))
-                    and np.all(self.std_devs
-                               == unp.std_devs(other)))
+            if is_not_vb:
+                return (np.all(self.nominal_values
+                               == unp.nominal_values(other))
+                        and np.all(self.std_devs
+                                   == unp.std_devs(other)))
+            else:
+                raise ValueError("Comparison numpy.ndarray or uncertainties.core.Variable \
+                is not supported for Map with VarMultiDimBinning")
 
         if isinstance(other, Map):
             if (self.full_comparison or other.full_comparison
@@ -2001,6 +2163,7 @@ def __eq__(self, other):
                 return recursiveEquality(self.hashable_state,
                                          other.hashable_state)
             return self.hash == other.hash
+                
 
         type_error(other)
 
@@ -2013,10 +2176,11 @@ def log(self):
         log_map : Map
 
         """
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "log(%s)" % self.name,
             #'tex': r"\ln\left( %s \right)" % self.tex,
-            'hist': unp.log(self.hist)
+            'hist': unp.log(self.hist) if is_not_vb else [unp.log(hi) for hi in self.hist]
         }
         return state_updates
 
@@ -2029,32 +2193,40 @@ def log10(self):
         log10_map : Map
 
         """
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "log10(%s)" % self.name,
             #'tex': r"\log_{10}\left( %s \right)" % self.tex,
-            'hist': unp.log10(self.hist)
+            'hist': unp.log10(self.hist) if is_not_vb else [unp.log10(hi) for hi in self.hist]
         }
         return state_updates
 
     @_new_obj
     def __mul__(self, other):
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "%s * %s" % (other, self.name),
                 #'tex': r"%s \cdot %s" % (other, self.tex),
-                'hist': self.hist * other
+                'hist': self.hist * other if is_not_vb else \
+                [hi * other for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            state_updates = {
-                #'name': "array * %s" % self.name,
-                #'tex': r"X \cdot %s" % self.tex,
-                'hist': self.hist * other,
-            }
+            if is_not_vb:
+                state_updates = {
+                    #'name': "array * %s" % self.name,
+                    #'tex': r"X \cdot %s" % self.tex,
+                    'hist': self.hist * other,
+                }
+            else:
+                raise ValueError("Multiplication by numpy.ndarray is not \
+                supported for Map with VarMultiDimBinning")
         elif isinstance(other, Map):
             state_updates = {
                 #'name': "%s * %s" % (self.name, other.name),
                 #'tex': r"%s \cdot %s" % (self.tex, other.tex),
-                'hist': self.hist * other.hist,
+                'hist': self.hist * other.hist if is_not_vb else \
+                [hi * other_hi for hi, other_hi in zip(self.hist,other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
             }
@@ -2067,33 +2239,41 @@ def __ne__(self, other):
 
     @_new_obj
     def __neg__(self):
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "-%s" % self.name,
             #'tex': r"-%s" % self.tex,
-            'hist': -self.hist,
+            'hist': -self.hist if is_not_vb else [-hi for hi in self.hist],
         }
         return state_updates
 
     @_new_obj
     def __pow__(self, other):
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "%s**%s" % (self.name, other),
                 #'tex': "%s^{%s}" % (self.tex, other),
-                'hist': unp.pow(self.hist, other)
+                'hist': unp.pow(self.hist, other) if is_not_vb else \
+                [unp.pow(hi, other) for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            state_updates = {
-                #'name': "%s**(array)" % self.name,
-                #'tex': r"%s^{X}" % self.tex,
-                'hist': unp.pow(self.hist, other),
-            }
+            if is_not_vb:
+                state_updates = {
+                    #'name': "%s**(array)" % self.name,
+                    #'tex': r"%s^{X}" % self.tex,
+                    'hist': unp.pow(self.hist, other),
+                }
+            else:
+                raise ValueError("Power operation using numpy.ndarray is \
+                not supported for Map with VarMultiDimBinning")
         elif isinstance(other, Map):
             state_updates = {
                 #'name': "%s**(%s)" % (self.name,
                 #                      strip_outer_parens(other.name)),
                 #'tex': r"%s^{%s}" % (self.tex, strip_outer_parens(other.tex)),
-                'hist': unp.pow(self.hist, other.hist),
+                'hist': unp.pow(self.hist, other.hist) if is_not_vb else \
+                [unp.pow(hi, other_hi) for hi, other_hi in zip(self.hist, other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
             }
@@ -2111,18 +2291,24 @@ def __rdiv__(self, other):
 
     @_new_obj
     def __rdiv(self, other):
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s / %s)" % (other, self.name),
                 #'tex': "{(%s / %s)}" % (other, self.tex),
-                'hist': other / self.hist,
+                'hist': other / self.hist if is_not_vb else \
+                [other / hi for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            state_updates = {
-                #'name': "array / %s" % self.name,
-                #'tex': "{(X / %s)}" % self.tex,
-                'hist': other / self.hist,
-            }
+            if is_not_vb:
+                state_updates = {
+                    #'name': "array / %s" % self.name,
+                    #'tex': "{(X / %s)}" % self.tex,
+                    'hist': other / self.hist,
+                    }
+            else:
+                raise ValueError("Division of numpy.ndarray by Map with \
+                VarMultiDimBinning is not supported")
         else:
             type_error(other)
         return state_updates
@@ -2137,18 +2323,24 @@ def __rsub__(self, other):
 
     @_new_obj
     def __rsub(self, other):
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s - %s)" % (other, self.name),
                 #'tex': "{(%s - %s)}" % (other, self.tex),
-                'hist': other - self.hist,
+                'hist': other - self.hist if is_not_vb else \
+                [other - hi for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            state_updates = {
-                #'name': "(array - %s)" % self.name,
-                #'tex': "{(X - %s)}" % self.tex,
-                'hist': other - self.hist,
-            }
+            if is_not_vb:
+                state_updates = {
+                    #'name': "(array - %s)" % self.name,
+                    #'tex': "{(X - %s)}" % self.tex,
+                    'hist': other - self.hist,
+                }
+            else:
+                raise ValueError("Substarction of numpy.ndarray by Map \
+                with VarMultiDimBinning is not supported")
         else:
             type_error(other)
         return state_updates
@@ -2162,44 +2354,24 @@ def sqrt(self):
         sqrt_map : Map
 
         """
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "sqrt(%s)" % self.name,
             #'tex': r"\sqrt{%s}" % self.tex,
             #'hist': np.asarray(unp.sqrt(self.hist), dtype='float'),
-            'hist': unp.sqrt(self.hist),
+            'hist': unp.sqrt(self.hist) if is_not_vb else [unp.sqrt(hi) for hi in self.hist],
         }
         return state_updates
 
-    @_new_obj
-    def __sub__(self, other):
-        if np.isscalar(other) or type(other) is uncertainties.core.Variable:
-            state_updates = {
-                #'name': "(%s - %s)" % (self.name, other),
-                #'tex': "{(%s - %s)}" % (self.tex, other),
-                'hist': self.hist - other,
-            }
-        elif isinstance(other, np.ndarray):
-            state_updates = {
-                #'name': "(%s - array)" % self.name,
-                #'tex': "{(%s - X)}" % self.tex,
-                'hist': self.hist - other,
-            }
-        elif isinstance(other, Map):
-            state_updates = {
-                #'name': "%s - %s" % (self.name, other.name),
-                #'tex': "{(%s - %s)}" % (self.tex, other.tex),
-                'hist': self.hist - other.hist,
-                'full_comparison': (self.full_comparison or
-                                    other.full_comparison),
-            }
-        else:
-            type_error(other)
-        return state_updates
-
     def allclose(self, other):
         '''Check if this map and another have the same (within machine precision) bin counts'''
         self.assert_compat(other)
-        return np.allclose(self.nominal_values, other.nominal_values, **ALLCLOSE_KW)
+        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
+        if is_not_vb:
+            return np.allclose(self.nominal_values, other.nominal_values, **ALLCLOSE_KW)
+        else:
+            return np.all([np.allclose(nv, other_nv, **ALLCLOSE_KW) \
+                           for nv, other_nv in zip(self.nominal_values, other.nominal_values)])
 
 
 # TODO: instantiate individual maps from dicts if passed as such, so user
diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py
index 311e21ee9..abbdd2443 100755
--- a/pisa/core/pipeline.py
+++ b/pisa/core/pipeline.py
@@ -27,7 +27,7 @@
 from pisa.core.param import ParamSet, DerivedParam
 from pisa.core.stage import Stage
 from pisa.core.container import ContainerSet
-from pisa.core.binning import MultiDimBinning
+from pisa.core.binning import MultiDimBinning, VarMultiDimBinning
 from pisa.utils.config_parser import PISAConfigParser, parse_pipeline_config
 from pisa.utils.fileio import mkdir
 from pisa.utils.hash import hash_obj
@@ -326,7 +326,7 @@ def get_outputs(self, output_binning=None, output_key=None):
             output_binning = self.output_binning
             output_key = self.output_key
         else:
-            assert(isinstance(output_binning, MultiDimBinning))
+            assert(isinstance(output_binning, (MultiDimBinning,VarMultiDimBinning)))
 
         assert output_binning is not None
 

From 6ef11c3b9678f5cfa3279ffa4c46c28845e4fc4b Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Wed, 30 Oct 2024 12:44:24 -0500
Subject: [PATCH 02/10] config parser update to include variable binning and
 added more methods to VarMultiDimBinnig

---
 pisa/core/binning.py        | 95 ++++++++++++++++++-------------------
 pisa/utils/config_parser.py | 27 ++++++++++-
 2 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/pisa/core/binning.py b/pisa/core/binning.py
index bfc5e2330..b7fe7f302 100644
--- a/pisa/core/binning.py
+++ b/pisa/core/binning.py
@@ -3123,6 +3123,7 @@ def __init__(self, event_species, name=None, mask=None):
         self._selections = None
         self._shape = None
         self._hashable_state = None
+        self._hash = None
 
         self.mask = None # not implemented, here so Map does not complain
 
@@ -3197,6 +3198,16 @@ def hashable_state(self):
 
         return self._hashable_state
 
+    @property
+    def hash(self):
+        """Unique hash value for this object"""
+        if self._hash is None:
+            self._hash = hash_obj(self.hashable_state)
+        return self._hash
+
+    def __hash__(self):
+        return self.hash
+
     def assert_array_fits(self, array):
         """Check if a 
 
@@ -3213,6 +3224,33 @@ def assert_array_fits(self, array):
                     % (array_el.shape, sp)
                 )
 
+    def assert_compat(self, other):
+        """Check if a (possibly different) binning can map onto the defined
+        binning. Allows for simple re-binning schemes (but no interpolation).
+
+        Parameters
+        ----------
+        other : Binning or container with attribute "binning"
+
+        Returns
+        -------
+        compat : bool
+
+        """
+        if not isinstance(other, VarMultiDimBinning):
+            for val in other.__dict__.values():
+                if isinstance(val, VarMultiDimBinning):
+                    other = val
+                    break
+        assert isinstance(other, VarMultiDimBinning), str(type(other))
+        if other == self:
+            return True
+        for my_sp, other_sp in zip(self, other):
+            if not (my_sp.binning.assert_compat(other_sp.binning) 
+                    and my_sp.selection == other_sp.selection):
+                return False
+        return True
+
     def __repr__(self):
         previous_precision = np.get_printoptions()['precision']
         np.set_printoptions(precision=18)
@@ -3234,6 +3272,14 @@ def __mul__(self, other):
         other = VarMultiDimBinning(other)
         return VarMultiDimBinning(chain(self, other))
 
+    def __eq__(self, other):
+        if not isinstance(other, VarMultiDimBinning):
+            return False
+        return recursiveEquality(self.hashable_state, other.hashable_state)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
     def __getitem__(self, index):
         """Return a new EventSpecie(s), sub-selected by `index`.
 
@@ -3280,9 +3326,6 @@ def __getitem__(self, index):
                 raise ValueError('Indices must be monotonically'
                                  ' increasing and adjacent.')
             new_species = []
-#             new_names = []
-#             new_selections = []
-#             new_binnings = []
             for sp_index in index:
                 if sp_index < -mylen or sp_index >= mylen:
                     raise ValueError(
@@ -3291,57 +3334,11 @@ def __getitem__(self, index):
                         %(self.name, bin_index, -mylen, mylen-1)
                     )
                 new_species.append(deepcopy(self._species[sp_index]))
-#                 new_names.append(self._names[sp_index])
-#                 new_selections.append(self._selections[sp_index])
-#                 new_binnings.append(self._binnings[sp_index])
         else:
             raise TypeError('Unhandled index type %s' %type(orig_index))
 
         return new_species
 
-        
-#     def __repr__(): # TODO
-
-#     @property
-#     def midpoints(self):
-#         """Return a list of the contained dimensions' midpoints"""
-#         return [d.midpoints for d in self]
-
-#     @property
-#     def weighted_centers(self):
-#         """Return a list of the contained dimensions' weighted_centers (e.g.
-#         equidistant from bin edges on logarithmic scale, if the binning is
-#         logarithmic; otherwise linear). Access `midpoints` attribute for
-#         always-linear alternative."""
-#         return [d.weighted_centers for d in self]
-
-#     @property
-#     def num_bins(self):
-#         """
-#         Return a list of the contained dimensions' num_bins.
-#         Note that this does not accpunt for any bin mask (since it is computed per dimension)
-#         """
-#         return [d.num_bins for d in self]
-
-#     @property
-#     def tot_num_bins(self):
-#         """
-#         Return total number of bins.
-#         If a bin mask is used, this will only count bins that are not masked off
-#         """
-#         if self.mask is None :
-#             return np.product(self.shape)
-#         else :
-#             return np.sum(self.mask.astype(int))
-
-#     @property
-#     def units(self):
-#         """list : Return a list of the contained dimensions' units"""
-#         return [d.units for d in self]
-
-#     def index(self, dim, use_basenames=False):
-
-
 
 def test_OneDimBinning():
     """Unit tests for OneDimBinning class"""
diff --git a/pisa/utils/config_parser.py b/pisa/utils/config_parser.py
index a8daa7a73..1ef2b8db8 100644
--- a/pisa/utils/config_parser.py
+++ b/pisa/utils/config_parser.py
@@ -590,7 +590,7 @@ def parse_pipeline_config(config):
 
     """
     # Note: imports placed here to avoid circular imports
-    from pisa.core.binning import MultiDimBinning, OneDimBinning
+    from pisa.core.binning import MultiDimBinning, OneDimBinning, VarMultiDimBinning, EventSpecie
     from pisa.core.param import ParamSelector, DerivedParam
 
     if isinstance(config, str):
@@ -660,7 +660,32 @@ def parse_pipeline_config(config):
                 mask = eval(mask)
             # Create the binning object
             binning_dict[binning] = MultiDimBinning(bins, name=binning, mask=mask)
+        # Now read the variable binning definitions
+        elif name.endswith('.specie_names'):
+            specie_names = split(config.get('binning', name))
+            binning, _ = split(name, sep='.')
 
+            species = []
+            for specie_name in specie_names:
+                try:
+                    sel = config.get('binning', binning + '.' + specie_name + '.selection')
+                    bin_def = config.get('binning', binning + '.' + specie_name + '.binning')
+                except:
+                    logging.error(
+                        "Failed to find definition of '%s'"
+                        " variable binning entry.",
+                        binning
+                    )
+                if not bin_def in binning_dict:
+                    logging.error(
+                        "Failed to find definition of '%s'"
+                        " binning used in '%s' variable binning entry. Variable binnings"
+                        "should be defined after MultiDimBinnings used in their definition.",
+                        binning
+                    )
+                species.append(EventSpecie(name=specie_name, selection=sel, 
+                                           binning=binning_dict[bin_def]))
+            binning_dict[binning] = VarMultiDimBinning(name=binning, event_species=species)
 
     stage_dicts = OrderedDict()
 

From 6b3ddaa4627d1efdc9f4caf737d0620e62b95224 Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Wed, 30 Oct 2024 12:57:27 -0500
Subject: [PATCH 03/10] rest of the Variable binning implementation

---
 pisa/core/container.py    |  83 ++++++++++---
 pisa/core/map.py          |  16 ++-
 pisa/core/pipeline.py     |   4 +-
 pisa/stages/utils/hist.py | 255 ++++++++++++++++++++++++++------------
 4 files changed, 252 insertions(+), 106 deletions(-)

diff --git a/pisa/core/container.py b/pisa/core/container.py
index f1802ca84..24adb575a 100644
--- a/pisa/core/container.py
+++ b/pisa/core/container.py
@@ -302,9 +302,15 @@ def representation(self, representation):
         key = hash(representation)
         if not key in self.representation_keys:
             self._representations[key] = representation
-            if isinstance(representation, (MultiDimBinning,VarMultiDimBinning)):
+            if isinstance(representation, MultiDimBinning):
                 for name in representation.names:
                     self.validity[name][key] = True
+            elif isinstance(representation, VarMultiDimBinning):
+#                 self.representation = 'events'
+                for bn in representation.binnings:
+                    for bn_name in bn.names:
+#                         print(bn_name, key)
+                        self.validity[bn_name][key] = True
             elif isinstance(representation, str):
                 if representation not in self.array_representations:
                     raise ValueError(f"Unknown representation '{representation}'")
@@ -418,22 +424,43 @@ def __add_data(self, key, data):
             assert isinstance(binning, (MultiDimBinning,VarMultiDimBinning))
             
             assert hash(self.representation) == hash(binning)
-                            
-            is_flat = array.shape[0] == binning.size
-            
-            if is_flat:
-                flat_array = array
-            else:
-                # first dimesnions must match
-                assert array.shape[:binning.num_dims] == binning.shape
-                
-                
-                if array.ndim > binning.num_dims:
-                    flat_shape = (binning.size, -1)
+            if isinstance(binning, MultiDimBinning):
+                is_flat = array.shape[0] == binning.size
+
+                if is_flat:
+                    flat_array = array
                 else:
-                    flat_shape = (binning.size, )
-                flat_array = array.reshape(flat_shape)
-            self.current_data[key] = flat_array
+                    # first dimesnions must match
+                    assert array.shape[:binning.num_dims] == binning.shape
+
+
+                    if array.ndim > binning.num_dims:
+                        flat_shape = (binning.size, -1)
+                    else:
+                        flat_shape = (binning.size, )
+                    flat_array = array.reshape(flat_shape)
+                self.current_data[key] = flat_array
+            elif isinstance(binning, VarMultiDimBinning):
+                # TODO: deal with duplicate code
+                arrays = []
+                for arr, binn in zip(array,binning):
+                    arr_binning = binn.binning
+                    is_flat = arr.shape[0] == arr_binning.size
+
+                    if is_flat:
+                        flat_array = arr
+                    else:
+                        # first dimesnions must match
+                        assert arr.shape[:arr_binning.num_dims] == arr_binning.shape
+
+
+                        if array.ndim > arr_binning.num_dims:
+                            flat_shape = (arr_binning.size, -1)
+                        else:
+                            flat_shape = (arr_binning.size, )
+                        flat_array = arr.reshape(flat_shape)
+                    arrays.append(flat_array)
+                self.current_data[key] = arrays
         else:
             raise TypeError('unknown dataformat')
             
@@ -474,11 +501,26 @@ def get_hist(self, key):
         """Return reshaped data as normal n-dimensional histogram"""
         assert self.is_map, 'Cannot retrieve hists from non-map data'
         
-        # retrieve in case needs translation
-        self[key]
+        # retrieve in case needs translation (except VarMultiDimBinning)
+#         print(key, self.keys)
+        if not isinstance(self.representation, VarMultiDimBinning):
+            self[key]
 
         binning = self.representation
         data = self[key]
+        print(binning, data)
+
+        if isinstance(binning, VarMultiDimBinning):
+            assert len(binning.species) == len(data)
+            reshaped_data = []
+            for d, binn in zip(data,binning.binnings):
+                if d.ndim > binn.num_dims:
+                    full_shape = list(binn.shape) + [-1] 
+                else:
+                    full_shape = list(binn.shape)
+                reshaped_data.append(d.reshape(full_shape))
+            return reshaped_data, binning
+
         if data.ndim > binning.num_dims:
             full_shape = list(binning.shape) + [-1] 
         else:
@@ -493,7 +535,10 @@ def get_map(self, key, error=None):
             error_hist = np.abs(self.get_hist(error)[0])
         else:
             error_hist = None
-        assert hist.ndim == binning.num_dims
+        if isinstance(binning, VarMultiDimBinning):
+            assert len(binning.species) == len(hist)
+        else:
+            assert hist.ndim == binning.num_dims
         return Map(name=self.name, hist=hist, error_hist=error_hist, binning=binning)
     
     def __iter__(self):
diff --git a/pisa/core/map.py b/pisa/core/map.py
index fa7af4089..a18934066 100755
--- a/pisa/core/map.py
+++ b/pisa/core/map.py
@@ -500,10 +500,16 @@ def set_errors(self, error_hist):
             )
             return
         self.assert_compat(error_hist)
-        super().__setattr__(
-            '_hist',
-            unp.uarray(self.nominal_values, np.ascontiguousarray(error_hist))
-        )
+        if not isinstance(self.binning, VarMultiDimBinning):
+            super().__setattr__(
+                '_hist',
+                unp.uarray(self.nominal_values, np.ascontiguousarray(error_hist))
+            )
+        else:
+            super().__setattr__(
+                '_hist', [unp.uarray(nv, np.ascontiguousarray(ehi)) \
+                                     for nv, ehi in zip(self.nominal_values,error_hist)]
+            )
 
     # TODO: make this return an OrderedDict to organize all of the returned
     # objects
@@ -1338,7 +1344,7 @@ def from_json(cls, resource):
     def assert_compat(self, other):
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             return
-        elif isinstance(other, np.ndarray):
+        elif isinstance(other, (np.ndarray, list)):
             self.binning.assert_array_fits(other)
         elif isinstance(other, Map):
             self.binning.assert_compat(other.binning)
diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py
index abbdd2443..d68a364a0 100755
--- a/pisa/core/pipeline.py
+++ b/pisa/core/pipeline.py
@@ -330,7 +330,9 @@ def get_outputs(self, output_binning=None, output_key=None):
 
         assert output_binning is not None
 
-        self.data.representation = output_binning
+        # TODO: figure out how this is messing up variable binning!!!
+#         self.data.representation = output_binning
+#         print(self.data.keys)
 
         if isinstance(output_key, tuple):
             assert len(output_key) == 2
diff --git a/pisa/stages/utils/hist.py b/pisa/stages/utils/hist.py
index de8baafb6..bb6aa5585 100644
--- a/pisa/stages/utils/hist.py
+++ b/pisa/stages/utils/hist.py
@@ -7,9 +7,10 @@
 
 from pisa.core.stage import Stage
 from pisa.core.translation import histogram
-from pisa.core.binning import MultiDimBinning, OneDimBinning
+from pisa.core.binning import MultiDimBinning, OneDimBinning, VarMultiDimBinning, EventSpecie
 from pisa.utils.profiler import profile
 from pisa.utils.log import logging
+import re
 
 
 class hist(Stage):  # pylint: disable=invalid-name
@@ -37,17 +38,30 @@ def __init__(
         assert self.calc_mode is not None
         assert self.apply_mode is not None
         self.regularized_apply_mode = None
+        self.variable_binning = None
         self.apply_unc_weights = apply_unc_weights
         self.unweighted = unweighted
 
     def setup_function(self):
 
-        assert isinstance(self.apply_mode, MultiDimBinning), (
+        assert isinstance(self.apply_mode, (MultiDimBinning, VarMultiDimBinning)), (
             "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode
         )
 
+#         for container in self.data:
+#             print('initial', container.keys)
+
+        self.variable_binning = False
+        if isinstance(self.apply_mode, VarMultiDimBinning):
+            self.variable_binning = True
+
         if isinstance(self.calc_mode, MultiDimBinning):
 
+            if self.variable_binning:
+                raise NotImplementedError(
+                    "MultiDimBinning to VarMultiDimBinning conversion is not implemented!"
+                )
+
             # The two binning must be exclusive
             assert len(set(self.calc_mode.names) & set(self.apply_mode.names)) == 0
 
@@ -63,54 +77,108 @@ def setup_function(self):
                 self.data.representation = self.calc_mode
                 container["hist_transform"] = transform
 
+        elif isinstance(self.calc_mode, VarMultiDimBinning):
+            raise NotImplementedError(
+                "Using VarMultiDimBinning as calc_mode is not implemented!"
+            )
+#             assert len(set(self.calc_mode.names) & set(self.apply_mode.names)) == 0
+#             for calc_mode_sp, apply_mode_sp in zip(self.calc_mode, self.apply_mode):
+#                 assert len(set(calc_mode_sp.binning.names) & set(apply_mode_sp.binning.names)) == 0
+            # TODO
+
         elif self.calc_mode == "events":
             # For dimensions where the binning is irregular, we pre-compute the
             # index that each sample falls into and then bin regularly in the index.
             # For dimensions that are logarithmic, we add a linear binning in
             # the logarithm.
-            dimensions = []
-            for dim in self.apply_mode:
-                if dim.is_irregular:
-                    # create a new axis with digitized variable
-                    varname = dim.name + "__" + self.apply_mode.name + "_idx"
-                    new_dim = OneDimBinning(
-                        varname, domain=[0, dim.num_bins], num_bins=dim.num_bins
-                    )
-                    dimensions.append(new_dim)
-                    for container in self.data:
-                        container.representation = "events"
-                        x = container[dim.name] * dim.units
-                        # Compute the bin index each sample would fall into, and
-                        # shift by -1 such that samples below the binning range
-                        # get assigned the index -1.
-                        x_idx = np.searchsorted(dim.bin_edges, x, side="right") - 1
-                        # To be consistent with numpy histogramming, we need to
-                        # shift those values that are exactly at the uppermost edge
-                        # down one index such that they are included in the highest
-                        # bin instead of being treated as an outlier.
-                        on_edge = x == dim.bin_edges[-1]
-                        x_idx[on_edge] -= 1
-                        container[varname] = x_idx
-                elif dim.is_log:
-                    # We don't compute the log of the variable just yet, this
-                    # will be done later during `apply_function` using the
-                    # representation mechanism.
-                    new_dim = OneDimBinning(
-                        dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins
-                    )
-                    dimensions.append(new_dim)
-                else:
-                    dimensions.append(dim)
-            self.regularized_apply_mode = MultiDimBinning(dimensions)
-            logging.debug(
-                "Using regularized binning:\n" + str(self.regularized_apply_mode)
-            )
+            if not self.variable_binning:
+                dimensions = []
+                for dim in self.apply_mode:
+                    if dim.is_irregular:
+                        # create a new axis with digitized variable
+                        varname = dim.name + "__" + self.apply_mode.name + "_idx"
+                        new_dim = OneDimBinning(
+                            varname, domain=[0, dim.num_bins], num_bins=dim.num_bins
+                        )
+                        dimensions.append(new_dim)
+                        for container in self.data:
+                            container.representation = "events"
+                            x = container[dim.name] * dim.units
+                            # Compute the bin index each sample would fall into, and
+                            # shift by -1 such that samples below the binning range
+                            # get assigned the index -1.
+                            x_idx = np.searchsorted(dim.bin_edges, x, side="right") - 1
+                            # To be consistent with numpy histogramming, we need to
+                            # shift those values that are exactly at the uppermost edge
+                            # down one index such that they are included in the highest
+                            # bin instead of being treated as an outlier.
+                            on_edge = x == dim.bin_edges[-1]
+                            x_idx[on_edge] -= 1
+                            container[varname] = x_idx
+                    elif dim.is_log:
+                        # We don't compute the log of the variable just yet, this
+                        # will be done later during `apply_function` using the
+                        # representation mechanism.
+                        new_dim = OneDimBinning(
+                            dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins
+                        )
+                        dimensions.append(new_dim)
+                    else:
+                        dimensions.append(dim)
+                # simplify the code by making it a VarMultiDimBinning ...
+                # self.regularized_apply_mode = MultiDimBinning(dimensions)
+                self.regularized_apply_mode = VarMultiDimBinning([EventSpecie(
+                    binning=MultiDimBinning(dimensions))])
+                logging.debug(
+                    "Using regularized binning:\n" + str(self.regularized_apply_mode)
+                )
+            else:
+                # TODO: deal with repeat code
+                print('calculating using var binning!!!')
+                species = []
+                for specie in self.apply_mode.species:
+                    dimensions = []
+                    for dim in specie.binning:
+                        if dim.is_irregular:
+                            varname = dim.name + "__" + specie.name + "_" + "_idx"
+                            print(varname)
+                            new_dim = OneDimBinning(
+                                varname, domain=[0, dim.num_bins], num_bins=dim.num_bins
+                            )
+                            dimensions.append(new_dim)
+                            for container in self.data:
+                                container.representation = "events"
+                                x = container[dim.name] * dim.units
+                                x_idx = np.searchsorted(dim.bin_edges, x, side="right") - 1
+                                on_edge = x == dim.bin_edges[-1]
+                                x_idx[on_edge] -= 1
+                                container[varname] = x_idx
+                        elif dim.is_log:
+                            new_dim = OneDimBinning(
+                                dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins
+                            )
+                            dimensions.append(new_dim)
+                        else:
+                            dimensions.append(dim)
+                    species.append(EventSpecie(name=specie.name, selection=specie.selection,
+                                              binning=MultiDimBinning(dimensions)))
+                self.regularized_apply_mode = VarMultiDimBinning(species)
+                logging.debug(
+                    "Using regularized binning:\n" + str(self.regularized_apply_mode)
+                )
+            
         else:
             raise ValueError(f"unknown calc mode: {self.calc_mode}")
 
+#         for container in self.data:
+#             print('setup_final', container.keys)
+
     @profile
     def apply_function(self):
 
+#         for container in self.data:
+#             print('apply_initial', container.keys)
+
         if isinstance(self.calc_mode, MultiDimBinning):
 
             if self.unweighted:
@@ -144,55 +212,80 @@ def apply_function(self):
 
         elif self.calc_mode == "events":
             for container in self.data:
+#                 print(container.keys)
                 container.representation = self.calc_mode
-                sample = []
-                dims_log = [d.is_log for d in self.apply_mode]
-                dims_ire = [d.is_irregular for d in self.apply_mode]
-                for dim, is_log, is_ire in zip(
-                    self.regularized_apply_mode, dims_log, dims_ire
-                ):
-                    if is_log and not is_ire:
-                        container.representation = "log_events"
-                        sample.append(container[dim.name])
-                    else:
-                        container.representation = "events"
-                        sample.append(container[dim.name])
-
-                if self.unweighted:
-                    if "astro_weights" in container.keys:
-                        weights = np.ones_like(
-                            container["weights"] + container["astro_weights"]
+                print(container.keys)
+                hists = []
+                errors = []
+                bin_unc2s = []
+                for specie, reg_specie in zip(self.apply_mode.species,
+                                              self.regularized_apply_mode):
+                    selection = specie.selection
+                    if selection is None:
+                        sel_mask = np.ones_like(container["weights"])
+                    for var_name in container.keys: #TODO add check that masks don't overlap
+                        # using word boundary \b to replace whole words only
+                        print(type(var_name), var_name)
+                        selection = re.sub(
+                            r'\b{}\b'.format(var_name),
+                            'container["%s"]' % (var_name),
+                            selection
                         )
+                    print(selection)
+                    sel_mask = eval(selection)  # pylint: disable=eval-used
+                    
+                    sample = []
+                    dims_log = [d.is_log for d in specie.binning]
+                    dims_ire = [d.is_irregular for d in specie.binning]
+                    for dim, is_log, is_ire in zip(
+                        reg_specie.binning, dims_log, dims_ire
+                    ):
+                        
+                        if is_log and not is_ire:
+                            container.representation = "log_events"
+                            sample.append(container[dim.name][sel_mask])
+                        else:
+                            container.representation = "events"
+                            sample.append(container[dim.name][sel_mask])
+
+                    if self.unweighted:
+                        if "astro_weights" in container.keys:
+                            weights = np.ones_like(
+                                container["weights"][sel_mask] + container["astro_weights"][sel_mask]
+                            )
+                        else:
+                            weights = np.ones_like(container["weights"][sel_mask])
                     else:
-                        weights = np.ones_like(container["weights"])
-                else:
-                    if "astro_weights" in container.keys:
-                        weights = container["weights"] + container["astro_weights"]
+                        if "astro_weights" in container.keys:
+                            weights = container["weights"][sel_mask] + container["astro_weights"][sel_mask]
+                        else:
+                            weights = container["weights"][sel_mask]
+                    if self.apply_unc_weights:
+                        unc_weights = container["unc_weights"][sel_mask]
                     else:
-                        weights = container["weights"]
-                if self.apply_unc_weights:
-                    unc_weights = container["unc_weights"]
-                else:
-                    unc_weights = np.ones(weights.shape)
+                        unc_weights = np.ones(weights.shape)
 
-                # The hist is now computed using a binning that is completely linear
-                # and regular
-                hist = histogram(
-                    sample,
-                    unc_weights*weights,
-                    self.regularized_apply_mode,
-                    averaged=False
-                )
+                    # The hist is now computed using a binning that is completely linear
+                    # and regular
+                    hist = histogram(
+                        sample,
+                        (unc_weights*weights),
+                        MultiDimBinning(reg_specie.binning),
+                        averaged=False
+                    )
+#                     print(hist)
+                    hists.append(hist)
 
-                if self.error_method == "sumw2":
-                    sumw2 = histogram(sample, np.square(unc_weights*weights),
-                        self.regularized_apply_mode, averaged=False)
-                    bin_unc2 = histogram(sample, np.square(unc_weights)*weights,
-                        self.regularized_apply_mode, averaged=False)
+                    if self.error_method == "sumw2":
+                        errors.append( np.sqrt(histogram(sample, np.square(unc_weights*weights),
+                            reg_specie.binning, averaged=False)) )
+                        bin_unc2s.append(histogram(sample, np.square(unc_weights)*weights,
+                            reg_specie.binning, averaged=False))
 
                 container.representation = self.apply_mode
-                container["weights"] = hist
+                container["weights"] = self.apply_mode, hists
+                print(container)
 
                 if self.error_method == "sumw2":
-                    container["errors"] = np.sqrt(sumw2)
-                    container["bin_unc2"] = bin_unc2
+                    container["errors"] = self.apply_mode, errors
+                    container["bin_unc2"] = self.apply_mode, bin_unc2s

From a5984b5c515c3523a365909dd771b0ce23a44927 Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Sun, 3 Nov 2024 15:58:30 -0600
Subject: [PATCH 04/10] add proper tests for variable binning and EventSpecie;
 also update __eq__ and __repr__ methods

---
 pisa/core/binning.py | 195 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 179 insertions(+), 16 deletions(-)

diff --git a/pisa/core/binning.py b/pisa/core/binning.py
index b7fe7f302..20f10e554 100644
--- a/pisa/core/binning.py
+++ b/pisa/core/binning.py
@@ -3056,12 +3056,36 @@ class EventSpecie(object):
     """
     # pylint: enable=line-too-long
     def __init__(self, binning, name=None, selection=None):
-        self.name = name
-        self.selection = selection # TODO: format?
-        self.binning = binning # sould be MultiDimBinning (maybe OneDim too?)
-        # TODO: add checks for proper format, type
+
+        if not isinstance(binning, MultiDimBinning):
+            try:
+                binning = MultiDimBinning(binning)
+            except:
+                TypeError('EventSpecie accepts MultiDimBinning as \
+                binning. Unable to initialize MultiDimBinning using \
+                provided binning: %s'%(binning))
+
+        self._name = name
+        self._selection = selection # TODO: add check for selection format?
+        self._binning = binning
+
         self._serializable_state = None
 
+    @property
+    def name(self):
+        """Name of the specie"""
+        return self._name
+
+    @property
+    def selection(self):
+        """Specie selection string"""
+        return self._selection
+
+    @property
+    def binning(self):
+        """Specie binning"""
+        return self._binning
+
     @property
     def serializable_state(self):
         """OrderedDict containing savable state attributes"""
@@ -3069,10 +3093,8 @@ def serializable_state(self):
             state = OrderedDict()
             state['name'] = self.name
             state['selection'] = self.selection
-            state['binning'] = self.binning.serializable_state
+            state['binning'] = 'MultiDimBinning(%s)'%self.binning.serializable_state
             self._serializable_state = state
-        # Since the tex property can be modified, must set every time this
-        # property is called
         return self._serializable_state
 
     @property
@@ -3084,13 +3106,23 @@ def __repr__(self):
         previous_precision = np.get_printoptions()['precision']
         np.set_printoptions(precision=18)
         try:
-            argstrs = [('%s=%r' %item) for item in
-                       self.serializable_state.items()]
+            argstrs = [('%s=%r'%(k,self.serializable_state[k])) for k in
+                       ('name','selection')]
+            argstrs.append('%s=%s'%('binning',repr(self.binning)))
             r = '%s(%s)' %(self.__class__.__name__, ',\n    '.join(argstrs))
         finally:
             np.set_printoptions(precision=previous_precision)
         return r
 
+    def __eq__(self, other):
+        if not isinstance(other, EventSpecie):
+            return False
+        if (self.name == other.name 
+            and self.selection == other.selection 
+            and self.binning == other.binning):
+            return True
+        return False
+
 class VarMultiDimBinning(object):
     # pylint: disable=line-too-long
     r"""
@@ -3136,7 +3168,7 @@ def shape(self):
 
     @property
     def name(self):
-        """Name of the species"""
+        """Name of the VarMultiDimBinning object"""
         return self._name
 
     @property
@@ -3188,9 +3220,6 @@ def hashable_state(self):
         if self._hashable_state is None:
 
             state = OrderedDict()
-            # TODO: Shouldn't order matter?
-            #state['dimensions'] = [self[name]._hashable_state
-            #                       for name in sorted(self.names)]
             state['event_species'] = [d.hashable_state for d in self.species]
             state['name'] = self.name
 
@@ -3255,8 +3284,9 @@ def __repr__(self):
         previous_precision = np.get_printoptions()['precision']
         np.set_printoptions(precision=18)
         try:
-            argstrs = [('%s=%r' %item) for item in
-                       self.serializable_state.items()]
+            species_argstrs = ['%r'%sp for sp in self.species]
+            argstrs = [('event_species=[%s]' %',\n    '.join(species_argstrs))]
+            argstrs.append('name=%s' %self.name)
             r = '%s(%s)' %(self.__class__.__name__, ',\n    '.join(argstrs))
         finally:
             np.set_printoptions(precision=previous_precision)
@@ -3275,7 +3305,35 @@ def __mul__(self, other):
     def __eq__(self, other):
         if not isinstance(other, VarMultiDimBinning):
             return False
-        return recursiveEquality(self.hashable_state, other.hashable_state)
+        # only comparing length of specie arrays since order can be different
+        if not len(self.shape)==len(other.shape):
+            return False
+
+        # sort by selection in case names are not unique
+        comp_order_self = np.argsort(self.selections)
+        comp_order_other = np.argsort(other.selections)
+        for i,j in zip(comp_order_self, comp_order_other):
+            if self.species[i] != other.species[j]:
+                return False
+        # throw warning if species are the same but in different order
+        if np.any(comp_order_self != comp_order_other):
+            logging.warning(
+                'Following VarMultiDimBinning objects are not equal because they '
+                'have different order of `event_species`:\n\n %r \n\nand\n\n %r',
+                self, other
+            )
+            return False
+        # throw info in case binnings only differ by name 
+        if self.name != other.name:
+            logging.info(
+                'Following VarMultiDimBinning objects are not equal '
+                'only because of different names.'
+                'Both binnings contain identical `event_species`:\n\n %r \n\nand\n\n %r',
+                self, other
+            )
+            return False
+            
+        return True
 
     def __ne__(self, other):
         return not self.__eq__(other)
@@ -3795,8 +3853,113 @@ def test_MultiDimBinning():
     
     logging.info('<< PASS : test_MultiDimBinning >>')
 
+def test_EventSpecie():
+
+    # needed so that eval(repr(ev_sp)) works
+    from numpy import array, float32, float64 # pylint: disable=unused-variable
+
+    binning_2d = MultiDimBinning([dict(name='reco_energy', is_log=True, num_bins=8, 
+                                       domain=[5., 80.], units='GeV'),
+                                  dict(name='reco_coszen', is_lin=True, num_bins=10, 
+                                       domain=[-1, 0.])
+                                ])
+    binning_3d = MultiDimBinning([dict(name='reco_energy', is_log=True, num_bins=12, 
+                                       domain=[5., 80.], units='GeV'),
+                                  dict(name='reco_coszen', is_lin=True, num_bins=5, 
+                                       domain=[-1, -0.2]),
+                                  dict(name='pid', is_lin=True, bin_edges=[0.0, 0.25, 0.55, 1.0], 
+                                       bin_names=['cascades', 'mixed','tracks'])
+                                ])
+
+    ev_sp1 = EventSpecie(binning=binning_2d, name='sp1_2d', selection='reco_rho36>200')
+    ev_sp2 = EventSpecie(binning=binning_3d, name='sp2_3d', selection='reco_rho36<=200')
+    ev_sp3 = EventSpecie(binning=binning_2d, name='sp1_2d', selection='reco_rho36<=200')
+
+    name1 = ev_sp1.name
+    name2 = ev_sp2.name
+    name3 = ev_sp3.name
+    selection1 = ev_sp1.selection
+    selection2 = ev_sp2.selection
+    selection3 = ev_sp3.selection
+    binning1 = ev_sp1.binning
+    binning2 = ev_sp2.binning
+    binning3 = ev_sp3.binning
+
+    assert name1 != name2
+    assert name1 == name3
+    assert selection1 != selection2
+    assert selection2 == selection3
+    assert binning1 != binning2
+    assert binning1 == binning3
+
+    _ = binning1.hashable_state
+
+    assert eval(repr(ev_sp1)) == ev_sp1 # pylint: disable=eval-used
+
+    # TODO: saving to file
+
+    logging.info('<< PASS : test_EventSpecie >>')
+
+def test_VarMultiDimBinning():
+
+    # needed so that eval(repr(vb)) works
+    from numpy import array, float32, float64 # pylint: disable=unused-variable
+
+    binning_2d = MultiDimBinning([dict(name='reco_energy', is_log=True, num_bins=8, 
+                                       domain=[5., 80.], units='GeV'),
+                                  dict(name='reco_coszen', is_lin=True, num_bins=10, 
+                                       domain=[-1, 0.])
+                                ])
+    binning_3d = MultiDimBinning([dict(name='reco_energy', is_log=True, num_bins=12, 
+                                       domain=[5., 80.], units='GeV'),
+                                  dict(name='reco_coszen', is_lin=True, num_bins=5, 
+                                       domain=[-1, -0.2]),
+                                  dict(name='pid', is_lin=True, bin_edges=[0.0, 0.25, 0.55, 1.0], 
+                                       bin_names=['cascades', 'mixed','tracks'])
+                                ])
+
+    vb = VarMultiDimBinning([EventSpecie(name='high_ndom', selection='n_hit_doms>20', binning=binning_3d),
+                             EventSpecie(name='low_ndom', selection='n_hit_doms<=20', binning=binning_2d),
+                            ])
+    vb2 = VarMultiDimBinning([EventSpecie(name='low_ndom', selection='n_hit_doms<=20', binning=binning_2d),
+                             EventSpecie(name='high_ndom', selection='n_hit_doms>20', binning=binning_3d),
+                            ])
+    vb2a = VarMultiDimBinning([EventSpecie(name='low_ndom', selection='n_hit_doms<=20', binning=binning_2d),
+                             EventSpecie(name='high_ndom', selection='n_hit_doms>20', binning=binning_3d),
+                            ],
+                             name='2a')
+    vb3 = VarMultiDimBinning([EventSpecie(name='low_ndom', selection='n_hit_doms<=20', binning=binning_2d),
+                             EventSpecie(name='mid_ndom', selection='(n_hit_doms>20)&(n_hit_doms<=30)', 
+                                         binning=binning_3d),
+                             EventSpecie(name='high_ndom', selection='n_hit_doms>30', binning=binning_3d),
+                            ])
+    
+
+    assert eval(repr(vb)) == vb # pylint: disable=eval-used
+
+    assert vb != vb2
+    assert vb2 != vb2a
+    assert vb2 != vb3
+    assert vb2.assert_compat(vb2a)
+    assert vb[0] != vb[1]
+
+    _ = vb.shape
+    _ = vb.name
+    _ = vb.species
+    _ = vb.names
+    _ = vb.binnings
+    _ = vb.selections
+    _ = vb.serializable_state
+    _ = vb.hashable_state
+    _ = hash(vb)
+
+    # TODO: saving to file
+
+    logging.info('<< PASS : test_VarMultiDimBinning >>')
 
 if __name__ == "__main__":
     set_verbosity(1)
     test_OneDimBinning()
     test_MultiDimBinning()
+    test_EventSpecie()
+    test_VarMultiDimBinning()

From 246254d8d6fa902932b43cf39f64ec01923c43c7 Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Sun, 3 Nov 2024 16:59:29 -0600
Subject: [PATCH 05/10] add a flag to Map for using variable binning

---
 pisa/core/map.py | 125 ++++++++++++++++++++---------------------------
 1 file changed, 53 insertions(+), 72 deletions(-)

diff --git a/pisa/core/map.py b/pisa/core/map.py
index a18934066..fd8b3395e 100755
--- a/pisa/core/map.py
+++ b/pisa/core/map.py
@@ -296,7 +296,8 @@ class Map(object):
 
     """
     _slots = ('name', 'hist', 'binning', 'hash', '_hash', 'tex',
-              'full_comparison', 'parent_indexer', '_normalize_values')
+              'full_comparison', 'parent_indexer', '_normalize_values',
+              'variable_binning')
     _state_attrs = ('name', 'hist', 'binning', 'hash', 'tex',
                     'full_comparison')
 
@@ -319,15 +320,15 @@ def __init__(self, name, hist, binning, error_hist=None, hash=None,
                                  ' type %s' %(binning, type(binning)))
         self.parent_indexer = None
 
-#         if isinstance(binning, VarMultiDimBinning):
-#             self.varbinning = True
-#         else:
-#             self.varbinning = False
+        if isinstance(binning, VarMultiDimBinning):
+            self.variable_binning = True
+        else:
+            self.variable_binning = False
 
         # Do the work here to set read-only attributes
         super().__setattr__('_binning', binning)
         binning.assert_array_fits(hist)
-        if not isinstance(binning, VarMultiDimBinning):
+        if not self.variable_binning:
             super().__setattr__(
                 '_hist', np.ascontiguousarray(hist)
             )
@@ -500,7 +501,7 @@ def set_errors(self, error_hist):
             )
             return
         self.assert_compat(error_hist)
-        if not isinstance(self.binning, VarMultiDimBinning):
+        if not self.variable_binning:
             super().__setattr__(
                 '_hist',
                 unp.uarray(self.nominal_values, np.ascontiguousarray(error_hist))
@@ -1565,9 +1566,8 @@ def llh(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            if is_not_vb:
+            if not self.variable_binning:
                 return stats.llh(actual_values=self.hist,
                                  expected_values=expected_values)
             else:
@@ -1600,9 +1600,8 @@ def mcllh_mean(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            if is_not_vb:
+            if not self.variable_binning:
                 return stats.mcllh_mean(actual_values=self.hist,
                                  expected_values=expected_values)
             else:
@@ -1636,9 +1635,8 @@ def mcllh_eff(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            if is_not_vb:
+            if not self.variable_binning:
                 return stats.mcllh_eff(actual_values=self.hist,
                                  expected_values=expected_values)
             else:
@@ -1671,9 +1669,8 @@ def conv_llh(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            if is_not_vb:
+            if not self.variable_binning:
                 return stats.conv_llh(actual_values=self.hist,
                                  expected_values=expected_values)
             else:
@@ -1712,9 +1709,8 @@ def barlow_llh(self, expected_values, binned=False):
         elif isinstance(expected_values, Iterable):
             expected_values = [reduceToHist(x) for x in expected_values]
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            if is_not_vb:
+            if not self.variable_binning:
                 return stats.barlow_llh(actual_values=self.hist,
                                  expected_values=expected_values)
             else:
@@ -1748,9 +1744,8 @@ def mod_chi2(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            if is_not_vb:
+            if not self.variable_binning:
                 return stats.mod_chi2(actual_values=self.hist,
                                  expected_values=expected_values)
             else:
@@ -1783,9 +1778,8 @@ def correct_chi2(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            if is_not_vb:
+            if not self.variable_binning:
                 return stats.correct_chi2(actual_values=self.hist,
                                  expected_values=expected_values)
             else:
@@ -1818,9 +1812,8 @@ def chi2(self, expected_values, binned=False):
         """
         expected_values = reduceToHist(expected_values)
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if binned:
-            if is_not_vb:
+            if not self.variable_binning:
                 return stats.chi2(actual_values=self.hist,
                                  expected_values=expected_values)
             else:
@@ -1851,8 +1844,7 @@ def signed_sqrt_mod_chi2(self, expected_values):
         """
         expected_values = reduceToHist(expected_values)
 
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
-        if is_not_vb:
+        if not self.variable_binning:
             return stats.chi2(actual_values=self.hist,
                              expected_values=expected_values)
         return [stats.chi2(actual_values=hi,
@@ -1877,12 +1869,13 @@ def generalized_poisson_llh(self, expected_values=None, empty_bins=None, binned=
 
         '''
 
+        #TODO: implement for VarMultiDimBinning
+        if self.variable_binning:
+            raise ValueError("generalized_poisson_llh not implemented for Maps with VarMultiDimBinning")
+
         llh_per_bin = stats.generalized_poisson_llh(actual_values=self.hist,
                                                     expected_values=expected_values,
                                                     empty_bins=empty_bins)
-        #TODO: implement for VarMultiDimBinning
-        if isinstance(self.binning, VarMultiDimBinning):
-            raise ValueError("generalized_poisson_llh not implemented for Maps with VarMultiDimBinning")
 
         if binned:
             return llh_per_bin
@@ -1967,7 +1960,7 @@ def hist(self):
         # Apply bin mask, if one exists
         # Set masked off elements to NaN (handling both cases where the hst is either a simple array, or has uncertainties)
         if self.binning.mask is not None :
-            if not isinstance(self.binning, VarMultiDimBinning):
+            if not self.variable_binning:
                 hist[~self.binning.mask] = ufloat(np.NaN, np.NaN) if isinstance(self._hist[np.unravel_index(0, self._hist.shape)], uncertainties.core.Variable) else np.NaN #TODO Is there a better way to check if this is a uarray?
             else:
                 raise ValueError("Masking for Maps with Variable binning is not implemented")
@@ -1978,7 +1971,7 @@ def hist(self):
     @property
     def nominal_values(self):
         """numpy.ndarray : Bin values stripped of uncertainties"""
-        if not isinstance(self.binning, VarMultiDimBinning):
+        if not self.variable_binning:
             return unp.nominal_values(self.hist)
         else:
             return [unp.nominal_values(hi) for hi in self.hist]
@@ -1986,7 +1979,7 @@ def nominal_values(self):
     @property
     def std_devs(self):
         """numpy.ndarray : Uncertainties (standard deviations) per bin"""
-        if not isinstance(self.binning, VarMultiDimBinning):
+        if not self.variable_binning:
             return unp.std_devs(self.hist)
         else:
             return [unp.std_devs(hi) for hi in self.hist]
@@ -2010,11 +2003,10 @@ def full_comparison(self, value):
 
     @_new_obj
     def __abs__(self):
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "|%s|" % (self.name,),
             #'tex': r"{\left| %s \right|}" % strip_outer_parens(self.tex),
-            'hist': np.abs(self.hist) if is_not_vb else \
+            'hist': np.abs(self.hist) if not self.variable_binning else \
             [np.abs(hi) for hi in self.hist]
         }
         return state_updates
@@ -2022,16 +2014,15 @@ def __abs__(self):
     @_new_obj
     def __add__(self, other):
         """Add `other` to self"""
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s + %s)" % (self.name, other),
                 #'tex': r"{(%s + %s)}" % (self.tex, other),
-                'hist': self.hist + other if is_not_vb else \
+                'hist': self.hist + other if not self.variable_binning else \
                 [hi+other for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            if is_not_vb:
+            if not self.variable_binning:
                 state_updates = {
                     #'name': "(%s + array)" % self.name,
                     #'tex': r"{(%s + X)}" % self.tex,
@@ -2044,7 +2035,7 @@ def __add__(self, other):
             state_updates = {
                 #'name': "(%s + %s)" % (self.name, other.name),
                 #'tex': r"{(%s + %s)}" % (self.tex, other.tex),
-                'hist': self.hist + other.hist if is_not_vb else \
+                'hist': self.hist + other.hist if not self.variable_binning else \
                 [hi+other_hi for hi, other_hi in zip(self.hist,other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
@@ -2058,16 +2049,15 @@ def __add__(self, other):
 
     @_new_obj
     def __sub__(self, other):
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s - %s)" % (self.name, other),
                 #'tex': "{(%s - %s)}" % (self.tex, other),
-                'hist': self.hist - other if is_not_vb else \
+                'hist': self.hist - other if not self.variable_binning else \
                 [hi-other for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            if is_not_vb:
+            if not self.variable_binning:
                 state_updates = {
                     #'name': "(%s - array)" % self.name,
                     #'tex': "{(%s - X)}" % self.tex,
@@ -2080,7 +2070,7 @@ def __sub__(self, other):
             state_updates = {
                 #'name': "%s - %s" % (self.name, other.name),
                 #'tex': "{(%s - %s)}" % (self.tex, other.tex),
-                'hist': self.hist - other.hist if is_not_vb else \
+                'hist': self.hist - other.hist if not self.variable_binning else \
                 [hi-other_hi for hi, other_hi in zip(self.hist,other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
@@ -2091,16 +2081,15 @@ def __sub__(self, other):
 
     @_new_obj
     def __div__(self, other):
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s / %s)" % (self.name, other),
                 #'tex': r"{(%s / %s)}" % (self.tex, other),
-                'hist': self.hist / other if is_not_vb else \
+                'hist': self.hist / other if not self.variable_binning else \
                 [hi/other for hi in self.hist]
             }
         elif isinstance(other, np.ndarray):
-            if is_not_vb:
+            if not self.variable_binning:
                 state_updates = {
                     #'name': "(%s / array)" % self.name,
                     #'tex': r"{(%s / X)}" % self.tex,
@@ -2113,7 +2102,7 @@ def __div__(self, other):
             state_updates = {
                 #'name': "(%s / %s)" % (self.name, other.name),
                 #'tex': r"{(%s / %s)}" % (self.tex, other.tex),
-                'hist': self.hist / other.hist if is_not_vb else \
+                'hist': self.hist / other.hist if not self.variable_binning else \
                 [hi/other_hi for hi, other_hi in zip(self.hist,other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
@@ -2145,16 +2134,15 @@ def __eq__(self, other):
         Otherwise, simply checks that the hashes are equal.
 
         """
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other):
-            if is_not_vb:
+            if not self.variable_binning:
                 return np.all(self.nominal_values == other)
             else:
                 return np.all([np.all(nv==other) for nv in self.nominal_values])
 
         if type(other) is uncertainties.core.Variable \
                 or isinstance(other, np.ndarray):
-            if is_not_vb:
+            if not self.variable_binning:
                 return (np.all(self.nominal_values
                                == unp.nominal_values(other))
                         and np.all(self.std_devs
@@ -2182,11 +2170,10 @@ def log(self):
         log_map : Map
 
         """
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "log(%s)" % self.name,
             #'tex': r"\ln\left( %s \right)" % self.tex,
-            'hist': unp.log(self.hist) if is_not_vb else [unp.log(hi) for hi in self.hist]
+            'hist': unp.log(self.hist) if not self.variable_binning else [unp.log(hi) for hi in self.hist]
         }
         return state_updates
 
@@ -2199,26 +2186,25 @@ def log10(self):
         log10_map : Map
 
         """
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "log10(%s)" % self.name,
             #'tex': r"\log_{10}\left( %s \right)" % self.tex,
-            'hist': unp.log10(self.hist) if is_not_vb else [unp.log10(hi) for hi in self.hist]
+            'hist': unp.log10(self.hist) if not self.variable_binning else \
+            [unp.log10(hi) for hi in self.hist]
         }
         return state_updates
 
     @_new_obj
     def __mul__(self, other):
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "%s * %s" % (other, self.name),
                 #'tex': r"%s \cdot %s" % (other, self.tex),
-                'hist': self.hist * other if is_not_vb else \
+                'hist': self.hist * other if not self.variable_binning else \
                 [hi * other for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            if is_not_vb:
+            if not self.variable_binning:
                 state_updates = {
                     #'name': "array * %s" % self.name,
                     #'tex': r"X \cdot %s" % self.tex,
@@ -2231,7 +2217,7 @@ def __mul__(self, other):
             state_updates = {
                 #'name': "%s * %s" % (self.name, other.name),
                 #'tex': r"%s \cdot %s" % (self.tex, other.tex),
-                'hist': self.hist * other.hist if is_not_vb else \
+                'hist': self.hist * other.hist if not self.variable_binning else \
                 [hi * other_hi for hi, other_hi in zip(self.hist,other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
@@ -2245,26 +2231,24 @@ def __ne__(self, other):
 
     @_new_obj
     def __neg__(self):
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "-%s" % self.name,
             #'tex': r"-%s" % self.tex,
-            'hist': -self.hist if is_not_vb else [-hi for hi in self.hist],
+            'hist': -self.hist if not self.variable_binning else [-hi for hi in self.hist],
         }
         return state_updates
 
     @_new_obj
     def __pow__(self, other):
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "%s**%s" % (self.name, other),
                 #'tex': "%s^{%s}" % (self.tex, other),
-                'hist': unp.pow(self.hist, other) if is_not_vb else \
+                'hist': unp.pow(self.hist, other) if not self.variable_binning else \
                 [unp.pow(hi, other) for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            if is_not_vb:
+            if not self.variable_binning:
                 state_updates = {
                     #'name': "%s**(array)" % self.name,
                     #'tex': r"%s^{X}" % self.tex,
@@ -2278,7 +2262,7 @@ def __pow__(self, other):
                 #'name': "%s**(%s)" % (self.name,
                 #                      strip_outer_parens(other.name)),
                 #'tex': r"%s^{%s}" % (self.tex, strip_outer_parens(other.tex)),
-                'hist': unp.pow(self.hist, other.hist) if is_not_vb else \
+                'hist': unp.pow(self.hist, other.hist) if not self.variable_binning else \
                 [unp.pow(hi, other_hi) for hi, other_hi in zip(self.hist, other.hist)],
                 'full_comparison': (self.full_comparison or
                                     other.full_comparison),
@@ -2297,16 +2281,15 @@ def __rdiv__(self, other):
 
     @_new_obj
     def __rdiv(self, other):
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s / %s)" % (other, self.name),
                 #'tex': "{(%s / %s)}" % (other, self.tex),
-                'hist': other / self.hist if is_not_vb else \
+                'hist': other / self.hist if not self.variable_binning else \
                 [other / hi for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            if is_not_vb:
+            if not self.variable_binning:
                 state_updates = {
                     #'name': "array / %s" % self.name,
                     #'tex': "{(X / %s)}" % self.tex,
@@ -2329,16 +2312,15 @@ def __rsub__(self, other):
 
     @_new_obj
     def __rsub(self, other):
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         if np.isscalar(other) or type(other) is uncertainties.core.Variable:
             state_updates = {
                 #'name': "(%s - %s)" % (other, self.name),
                 #'tex': "{(%s - %s)}" % (other, self.tex),
-                'hist': other - self.hist if is_not_vb else \
+                'hist': other - self.hist if not self.variable_binning else \
                 [other - hi for hi in self.hist],
             }
         elif isinstance(other, np.ndarray):
-            if is_not_vb:
+            if not self.variable_binning:
                 state_updates = {
                     #'name': "(array - %s)" % self.name,
                     #'tex': "{(X - %s)}" % self.tex,
@@ -2360,20 +2342,19 @@ def sqrt(self):
         sqrt_map : Map
 
         """
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
         state_updates = {
             #'name': "sqrt(%s)" % self.name,
             #'tex': r"\sqrt{%s}" % self.tex,
             #'hist': np.asarray(unp.sqrt(self.hist), dtype='float'),
-            'hist': unp.sqrt(self.hist) if is_not_vb else [unp.sqrt(hi) for hi in self.hist],
+            'hist': unp.sqrt(self.hist) if not self.variable_binning else \
+            [unp.sqrt(hi) for hi in self.hist],
         }
         return state_updates
 
     def allclose(self, other):
         '''Check if this map and another have the same (within machine precision) bin counts'''
         self.assert_compat(other)
-        is_not_vb = not isinstance(self.binning, VarMultiDimBinning)
-        if is_not_vb:
+        if not self.variable_binning:
             return np.allclose(self.nominal_values, other.nominal_values, **ALLCLOSE_KW)
         else:
             return np.all([np.allclose(nv, other_nv, **ALLCLOSE_KW) \

From 22d70683071f900ad0e0f2764597e6c514513814 Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Mon, 4 Nov 2024 02:59:19 -0600
Subject: [PATCH 06/10] added  or  flags to Map for methods where
 VarMultiDimBinning either does not make sense or support is not yet
 implemented; also extended test_Map to include tests with VarMultiDimBinning

---
 pisa/core/binning.py |   2 +-
 pisa/core/map.py     | 172 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 160 insertions(+), 14 deletions(-)

diff --git a/pisa/core/binning.py b/pisa/core/binning.py
index 20f10e554..88e5e6b01 100644
--- a/pisa/core/binning.py
+++ b/pisa/core/binning.py
@@ -3093,7 +3093,7 @@ def serializable_state(self):
             state = OrderedDict()
             state['name'] = self.name
             state['selection'] = self.selection
-            state['binning'] = 'MultiDimBinning(%s)'%self.binning.serializable_state
+            state['binning'] = self.binning.serializable_state
             self._serializable_state = state
         return self._serializable_state
 
diff --git a/pisa/core/map.py b/pisa/core/map.py
index fd8b3395e..14237d52f 100755
--- a/pisa/core/map.py
+++ b/pisa/core/map.py
@@ -30,7 +30,7 @@
 from uncertainties import unumpy as unp
 
 from pisa import ureg, HASH_SIGFIGS
-from pisa.core.binning import OneDimBinning, MultiDimBinning, VarMultiDimBinning
+from pisa.core.binning import OneDimBinning, MultiDimBinning, VarMultiDimBinning, EventSpecie
 from pisa.utils.comparisons import normQuant, recursiveEquality, ALLCLOSE_KW
 from pisa.utils.flavInt import NuFlavIntGroup
 from pisa.utils.hash import hash_obj
@@ -315,6 +315,15 @@ def __init__(self, name, hist, binning, error_hist=None, hash=None,
                 binning = MultiDimBinning(dimensions=binning)
             elif isinstance(binning, Mapping):
                 binning = MultiDimBinning(**binning)
+                # TODO: figure out file read out for VarMultiDimBinning
+#                 if not 'event_species' in binning.keys():
+#                     binning = MultiDimBinning(**binning)
+#                 else:
+#                     ev_species = [EventSpecie(**es_str) for es_str 
+#                                   in binning['event_species']]
+#                     print(binning['event_species'])
+#                     binning = VarMultiDimBinning(event_species=ev_species,
+#                                                  name=binning['name'])
             else:
                 raise ValueError('Do not know what to do with `binning`=%s of'
                                  ' type %s' %(binning, type(binning)))
@@ -345,9 +354,17 @@ def __repr__(self):
         np.set_printoptions(precision=18)
         try:
             state = self.serializable_state
-            state['hist'] = np.array_repr(state['hist'])
+
+            if not self.variable_binning:
+                state['hist'] = np.array_repr(state['hist'])
+            else:
+                state['hist'] = [np.array_repr(h) for h in state['hist']]
+
             if state['error_hist'] is not None:
-                state['error_hist'] = np.array_repr(state['error_hist'])
+                if not self.variable_binning:
+                    state['error_hist'] = np.array_repr(state['error_hist'])
+                else:
+                    state['error_hist'] = [np.array_repr(eh) for eh in state['error_hist']]
             argstrs = [('%s=%r' % item) for item in
                        self.serializable_state.items()]
             r = '%s(%s)' % (self.__class__.__name__, ',\n    '.join(argstrs))
@@ -361,7 +378,10 @@ def __str__(self):
         state = {a: getattr(self, a) for a in attrs}
         state['name'] = repr(state['name'])
         state['tex'] = repr(state['tex'])
-        state['hist'] = np.array_repr(state['hist'])
+        if not self.variable_binning:
+            state['hist'] = np.array_repr(state['hist'])
+        else:
+            state['hist'] = [np.array_repr(h) for h in state['hist']]
         argstrs = [('%s=%s' % (a, state[a])) for a in attrs]
         s = '%s(%s)' % (self.__class__.__name__, ',\n    '.join(argstrs))
         return s
@@ -404,6 +424,9 @@ def item(self, *args):
         z : Standard Python scalar object
 
         """
+        if self.variable_binning:
+            raise TypeError('item() method not supported for Map with VarMultiDimBinning')
+
         return self.hist.item(*args)
 
     def slice(self, **kwargs):
@@ -473,15 +496,24 @@ def slice(self, **kwargs):
             object's dimensionality.
 
         """
+        if self.variable_binning:
+            raise TypeError('slice() method not supported for Map with VarMultiDimBinning')
+
         return self[self.binning.indexer(**kwargs)]
 
     def set_poisson_errors(self):
         """Approximate poisson errors using sqrt(n)."""
         nom_values = self.nominal_values
-        super().__setattr__(
-            '_hist',
-            unp.uarray(nom_values, np.sqrt(nom_values))
-        )
+        if not self.variable_binning:
+            super().__setattr__(
+                '_hist',
+                unp.uarray(nom_values, np.sqrt(nom_values))
+            )
+        else:
+            super().__setattr__(
+                '_hist',
+                [unp.uarray(nv, np.sqrt(nv)) for nv in nom_values]
+            )
 
     def set_errors(self, error_hist):
         """Manually define the error with an array the same shape as the
@@ -538,6 +570,11 @@ def compare(self, ref):
           * 'infmatch' : bool, whether +inf (and separately -inf) entries match
 
         """
+
+        if self.variable_binning:
+            raise TypeError('compare() method is not implemented for '
+            'Map with VarMultiDimBinning')
+
         assert isinstance(ref, Map)
         assert ref.binning == self.binning
         diff = self - ref
@@ -705,6 +742,11 @@ def plot(self, symm=False, logz=False, vmin=None, vmax=None, backend=None,
         colorbar : :class:`matplotlib.colorbar.Colorbar`
 
         """
+
+        if self.variable_binning:
+            raise TypeError('plot() method is not implemented for '
+            'Map with VarMultiDimBinning')
+
         import matplotlib as mpl
         import matplotlib.pyplot as plt
         from mpl_toolkits.axes_grid1 import make_axes_locatable
@@ -949,6 +991,11 @@ def reorder_dimensions(self, order):
             Modify Map (and its binning) by combining adjacent bins
 
         """
+
+        if self.variable_binning:
+            raise TypeError('reorder_dimensions() method not supported for '
+                            'Map with VarMultiDimBinning')
+
         new_binning = self.binning.reorder_dimensions(order)
         orig_order = list(range(len(self.binning)))
         new_order = [self.binning.index(b, use_basenames=False)
@@ -969,6 +1016,11 @@ def squeeze(self):
         Map with equivalent values but singleton dimensions removed
 
         """
+
+        if self.variable_binning:
+            raise TypeError('squeeze() method not supported for '
+                            'Map with VarMultiDimBinning')
+
         new_binning = self.binning.squeeze()
         new_hist = self.hist.squeeze()
         return {'hist': new_hist, 'binning': new_binning}
@@ -976,9 +1028,13 @@ def squeeze(self):
     @_new_obj
     def round2int(self):
         binning = self.binning
-        nominal_values = np.rint(self.nominal_values)
+        nominal_values = self.nominal_values
         std_devs = self.std_devs
-        return {'hist': unp.uarray(nominal_values, std_devs)}
+        if not self.variable_binning:
+            return {'hist': unp.uarray(np.rint(nominal_values), std_devs)}
+        else:
+            return {'hist': [unp.uarray(np.rint(nv), sd) for nv,sd 
+                             in zip(nominal_values, std_devs)]}
 
     @_new_obj
     def sum(self, axis=None, keepdims=False):
@@ -1008,6 +1064,10 @@ def sum(self, axis=None, keepdims=False):
 
         #TODO This function does't work if axis=None, since @_new_obj expects the output to be a map
 
+        if self.variable_binning:
+            raise TypeError('sum() method not supported for '
+                            'Map with VarMultiDimBinning')
+
         if axis is None:
             axis = self.binning.names
         if isinstance(axis, (string_types, int)):
@@ -1045,6 +1105,11 @@ def project(self, axis, keepdims=False):
         projection : Map
 
         """
+
+        if self.variable_binning:
+            raise TypeError('project() method not supported for '
+                            'Map with VarMultiDimBinning')
+
         keep_index = self.binning.index(axis)
         sum_indices = list(range(len(self.binning.dims)))
         sum_indices.remove(keep_index)
@@ -1076,6 +1141,10 @@ def rebin(self, new_binning):
         """
         # TODO: put uncertainties in
 
+        if self.variable_binning:
+            raise TypeError('rebin() method not supported for '
+                            'Map with VarMultiDimBinning')
+
         assert self.binning.mask is None, "`rebin` function does not currenty support bin masking"
 
         new_hist = rebin(hist=self.hist, orig_binning=self.binning,
@@ -1090,6 +1159,11 @@ def downsample(self, *args, **kwargs):
         details.
 
         """
+
+        if self.variable_binning:
+            raise TypeError('downsample() method not supported for '
+                            'Map with VarMultiDimBinning')
+
         new_binning = self.binning.downsample(*args, **kwargs)
         return self.rebin(new_binning)
 
@@ -1121,6 +1195,11 @@ def fluctuate(self, method, random_state=None, jumpahead=None):
         ..  [1] Bohm & Zech, "Statistics of weighted Poisson events and its applications" (2013),
             https://arxiv.org/abs/1309.1287
         """
+
+        if self.variable_binning:
+            raise TypeError('fluctuate() method not yet implemented for '
+                            'Map with VarMultiDimBinning (TODO)')
+
         orig = method
         method = str(method).strip().lower().replace(' ', '')
         if not method in FLUCTUATE_METHODS:
@@ -1234,17 +1313,25 @@ def fluctuate(self, method, random_state=None, jumpahead=None):
     @property
     def shape(self):
         """tuple : shape of the map, akin to `nump.ndarray.shape`"""
-        return self.hist.shape
+        # return self.hist.shape
+        # any reason to take shape from the hist and not binning for Map with MultiDimBinning?
+        return self.binning.shape
 
     @property
     def size(self):
         """int : total number of elements"""
-        return self.hist.size
+        if not self.variable_binning:
+            return self.hist.size
+        else:
+            return np.sum([h.size for h in self.hist])
 
     @property
     def num_entries(self):
         """int : total number of weighted entries in all bins"""
-        return np.sum(valid_nominal_values(self.hist))
+        if not self.variable_binning:
+            return np.sum(valid_nominal_values(self.hist))
+        else:
+            return np.sum([np.sum(valid_nominal_values(h)) for h in self.hist])
 
     @property
     def serializable_state(self):
@@ -1316,6 +1403,10 @@ def to_json(self, filename, **kwargs):
         pisa.utils.jsons.to_json
 
         """
+        if self.variable_binning:
+            raise TypeError('to_json() method not yet implemented for '
+                            'Map with VarMultiDimBinning (TODO)')
+
         jsons.to_json(self.serializable_state, filename=filename, **kwargs)
 
     @classmethod
@@ -1365,6 +1456,10 @@ def iterbins(self):
         Map object containing one of each bin of this Map
 
         """
+        if self.variable_binning:
+            raise TypeError('iterbins() method is not supported for '
+                            'Map with VarMultiDimBinning')
+
         for i in range(self.size):
             idx_coord = self.binning.index2coord(i)
             idx_view = tuple(slice(x, x+1) for x in idx_coord)
@@ -1382,6 +1477,10 @@ def iterbins(self):
     # TODO : example!
     def itercoords(self):
         """Iterator that yields the coordinate of each bin in the map."""
+        if self.variable_binning:
+            raise TypeError('itercoords() method is not supported for '
+                            'Map with VarMultiDimBinning')
+
         return self.binning.itercoords()
 
     def __hash__(self):
@@ -1406,6 +1505,10 @@ def _slice_or_index(self, idx):
         hist should be).
 
         """
+        if self.variable_binning:
+            raise TypeError('_slice_or_index() method is not supported for '
+                            'Map with VarMultiDimBinning')
+
         new_binning = self.binning[idx]
 
         new_map = Map(name=self.name,
@@ -1473,6 +1576,9 @@ def split(self, dim, bin=None, use_basenames=False, pure_bin_names=False):
             copied into the new map(s).
 
         """
+        if self.variable_binning:
+            raise TypeError('split() method is not supported for '
+                            'Map with VarMultiDimBinning')
 
         dim_index = self.binning.index(dim, use_basenames=use_basenames)
         spliton_dim = self.binning.dims[dim_index]
@@ -3500,6 +3606,46 @@ def test_Map():
         # assert test_return[2] == matplotlib.collections.QuadMesh
         # assert test_return[3] == matplotlib.colorbar.Colorbar
 
+
+    # Test Map with VarMultiDimBinning
+    binning_2d = MultiDimBinning([dict(name='reco_energy', is_log=True, 
+                                       num_bins=12, domain=[5., 80.], units='GeV'),
+                                  dict(name='reco_coszen', is_lin=True, 
+                                       num_bins=10, domain=[-1, 0.])
+                                ])
+    binning_3d = MultiDimBinning([dict(name='reco_energy', is_log=True, 
+                                       num_bins=12, domain=[5., 80.], units='GeV'),
+                                  dict(name='reco_coszen', is_lin=True,
+                                       num_bins=10, domain=[-1, 0.]),
+                                  dict(name='pid', is_lin=True, 
+                                       bin_edges=[0.0, 0.25, 0.55, 1.0], 
+                                       bin_names=['cascades', 'mixed','tracks'])
+                                ])
+    vb = VarMultiDimBinning([EventSpecie(name='high_ndom', selection='n_hit_doms>20', 
+                                         binning=binning_3d),
+                             EventSpecie(name='low_ndom', selection='n_hit_doms<20', 
+                                         binning=binning_2d),
+                            ])
+
+    hist_vb = [np.ones(sp) for sp in vb.shape]
+    mvb = Map(name='a', hist=hist_vb, error_hist=hist_vb, binning=vb)
+
+    mvb.set_poisson_errors()
+    mvb.set_errors([3*np.ones(sp) for sp in vb.shape])
+    _ = mvb.shape
+    _ = mvb.size
+    _ = mvb.num_entries
+
+    mvb_copy = deepcopy(mvb)
+    assert mvb == mvb_copy
+    assert mvb*2 != mvb_copy
+    assert mvb.sqrt()**2 == mvb_copy
+
+    _ = mvb+mvb_copy
+    _ = mvb-mvb_copy
+    _ = mvb*mvb_copy
+    _ = mvb/mvb_copy
+
     logging.info(str(('<< PASS : test_Map >>')))
 
 

From 659577e81f8de75d7d9b73ead3d3d01d926ea13b Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Tue, 5 Nov 2024 02:53:56 -0600
Subject: [PATCH 07/10] clean up pipeline and container; allow for better
 implenetation of variable binning

---
 pisa/core/container.py    | 18 ++++++------------
 pisa/core/pipeline.py     |  8 +++-----
 pisa/stages/utils/hist.py | 21 ++-------------------
 3 files changed, 11 insertions(+), 36 deletions(-)

diff --git a/pisa/core/container.py b/pisa/core/container.py
index 24adb575a..1590fdf6e 100644
--- a/pisa/core/container.py
+++ b/pisa/core/container.py
@@ -33,7 +33,7 @@ class ContainerSet(object):
 
     containers : list or None
 
-    data_specs : MultiDimBinning, VarMultiDimBinning, "events" or None
+    data_specs : MultiDimBinning, VarMultiDimBinning (partial support), "events" or None
 
     """
     def __init__(self, name, containers=None, data_specs=None):
@@ -306,10 +306,8 @@ def representation(self, representation):
                 for name in representation.names:
                     self.validity[name][key] = True
             elif isinstance(representation, VarMultiDimBinning):
-#                 self.representation = 'events'
                 for bn in representation.binnings:
                     for bn_name in bn.names:
-#                         print(bn_name, key)
                         self.validity[bn_name][key] = True
             elif isinstance(representation, str):
                 if representation not in self.array_representations:
@@ -501,14 +499,11 @@ def get_hist(self, key):
         """Return reshaped data as normal n-dimensional histogram"""
         assert self.is_map, 'Cannot retrieve hists from non-map data'
         
-        # retrieve in case needs translation (except VarMultiDimBinning)
-#         print(key, self.keys)
-        if not isinstance(self.representation, VarMultiDimBinning):
-            self[key]
+        # retrieve in case needs translation
+        self[key]
 
         binning = self.representation
         data = self[key]
-        print(binning, data)
 
         if isinstance(binning, VarMultiDimBinning):
             assert len(binning.species) == len(data)
@@ -561,8 +556,8 @@ def translate(self, key, src_representation):
             # nothing to do
             return    
     
-        from_map = isinstance(src_representation, (MultiDimBinning,VarMultiDimBinning))
-        to_map = isinstance(dest_representation, (MultiDimBinning,VarMultiDimBinning))
+        from_map = isinstance(src_representation, MultiDimBinning)
+        to_map = isinstance(dest_representation, MultiDimBinning)
     
         if self.tranlation_modes[key] == 'average':            
             if from_map and to_map:
@@ -664,10 +659,9 @@ def array_to_binned(self, key, src_representation, dest_representation, averaged
         """
         # TODO: make work for n-dim
         logging.trace('Transforming %s array to binned data'%(key))
-#         print('calling container.array_to_binned() method')
         
         assert src_representation in self.array_representations
-        assert isinstance(dest_representation, (MultiDimBinning,VarMultiDimBinning))
+        assert isinstance(dest_representation, MultiDimBinning)
         
         if not dest_representation.is_irregular:
             sample = []
diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py
index d68a364a0..52c2badf3 100755
--- a/pisa/core/pipeline.py
+++ b/pisa/core/pipeline.py
@@ -318,8 +318,6 @@ def _init_stages(self):
     def get_outputs(self, output_binning=None, output_key=None):
         """Get MapSet output"""
 
-
-
         self.run()
 
         if output_binning is None:
@@ -330,9 +328,9 @@ def get_outputs(self, output_binning=None, output_key=None):
 
         assert output_binning is not None
 
-        # TODO: figure out how this is messing up variable binning!!!
-#         self.data.representation = output_binning
-#         print(self.data.keys)
+        # avoid unnecessary translation
+        if self.data.representation != output_binning:
+            self.data.representation = output_binning
 
         if isinstance(output_key, tuple):
             assert len(output_key) == 2
diff --git a/pisa/stages/utils/hist.py b/pisa/stages/utils/hist.py
index bb6aa5585..a1c8bdab8 100644
--- a/pisa/stages/utils/hist.py
+++ b/pisa/stages/utils/hist.py
@@ -48,9 +48,6 @@ def setup_function(self):
             "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode
         )
 
-#         for container in self.data:
-#             print('initial', container.keys)
-
         self.variable_binning = False
         if isinstance(self.apply_mode, VarMultiDimBinning):
             self.variable_binning = True
@@ -134,14 +131,12 @@ def setup_function(self):
                 )
             else:
                 # TODO: deal with repeat code
-                print('calculating using var binning!!!')
                 species = []
                 for specie in self.apply_mode.species:
                     dimensions = []
                     for dim in specie.binning:
                         if dim.is_irregular:
                             varname = dim.name + "__" + specie.name + "_" + "_idx"
-                            print(varname)
                             new_dim = OneDimBinning(
                                 varname, domain=[0, dim.num_bins], num_bins=dim.num_bins
                             )
@@ -170,15 +165,9 @@ def setup_function(self):
         else:
             raise ValueError(f"unknown calc mode: {self.calc_mode}")
 
-#         for container in self.data:
-#             print('setup_final', container.keys)
-
     @profile
     def apply_function(self):
 
-#         for container in self.data:
-#             print('apply_initial', container.keys)
-
         if isinstance(self.calc_mode, MultiDimBinning):
 
             if self.unweighted:
@@ -212,9 +201,7 @@ def apply_function(self):
 
         elif self.calc_mode == "events":
             for container in self.data:
-#                 print(container.keys)
                 container.representation = self.calc_mode
-                print(container.keys)
                 hists = []
                 errors = []
                 bin_unc2s = []
@@ -225,13 +212,11 @@ def apply_function(self):
                         sel_mask = np.ones_like(container["weights"])
                     for var_name in container.keys: #TODO add check that masks don't overlap
                         # using word boundary \b to replace whole words only
-                        print(type(var_name), var_name)
                         selection = re.sub(
                             r'\b{}\b'.format(var_name),
                             'container["%s"]' % (var_name),
                             selection
                         )
-                    print(selection)
                     sel_mask = eval(selection)  # pylint: disable=eval-used
                     
                     sample = []
@@ -273,18 +258,16 @@ def apply_function(self):
                         MultiDimBinning(reg_specie.binning),
                         averaged=False
                     )
-#                     print(hist)
                     hists.append(hist)
 
                     if self.error_method == "sumw2":
                         errors.append( np.sqrt(histogram(sample, np.square(unc_weights*weights),
                             reg_specie.binning, averaged=False)) )
-                        bin_unc2s.append(histogram(sample, np.square(unc_weights)*weights,
-                            reg_specie.binning, averaged=False))
+                        bin_unc2s.append( histogram(sample, np.square(unc_weights)*weights,
+                            reg_specie.binning, averaged=False) )
 
                 container.representation = self.apply_mode
                 container["weights"] = self.apply_mode, hists
-                print(container)
 
                 if self.error_method == "sumw2":
                     container["errors"] = self.apply_mode, errors

From e07b0469288c695a1e0da6d6a750d2ba31a0b15d Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Sat, 9 Nov 2024 19:37:48 -0600
Subject: [PATCH 08/10] fix MultiDimBinning handling in hist stage (a bit of
 duplicate code for now but leaving this for later); also fix selection=None
 handling in config_parser

---
 pisa/stages/utils/hist.py   | 145 +++++++++++++++++++++++++-----------
 pisa/utils/config_parser.py |   3 +-
 2 files changed, 103 insertions(+), 45 deletions(-)

diff --git a/pisa/stages/utils/hist.py b/pisa/stages/utils/hist.py
index a1c8bdab8..05e3a86fa 100644
--- a/pisa/stages/utils/hist.py
+++ b/pisa/stages/utils/hist.py
@@ -124,8 +124,7 @@ def setup_function(self):
                         dimensions.append(dim)
                 # simplify the code by making it a VarMultiDimBinning ...
                 # self.regularized_apply_mode = MultiDimBinning(dimensions)
-                self.regularized_apply_mode = VarMultiDimBinning([EventSpecie(
-                    binning=MultiDimBinning(dimensions))])
+                self.regularized_apply_mode = MultiDimBinning(dimensions)
                 logging.debug(
                     "Using regularized binning:\n" + str(self.regularized_apply_mode)
                 )
@@ -200,53 +199,37 @@ def apply_function(self):
                     container["bin_unc2"] = bin_unc2
 
         elif self.calc_mode == "events":
-            for container in self.data:
-                container.representation = self.calc_mode
-                hists = []
-                errors = []
-                bin_unc2s = []
-                for specie, reg_specie in zip(self.apply_mode.species,
-                                              self.regularized_apply_mode):
-                    selection = specie.selection
-                    if selection is None:
-                        sel_mask = np.ones_like(container["weights"])
-                    for var_name in container.keys: #TODO add check that masks don't overlap
-                        # using word boundary \b to replace whole words only
-                        selection = re.sub(
-                            r'\b{}\b'.format(var_name),
-                            'container["%s"]' % (var_name),
-                            selection
-                        )
-                    sel_mask = eval(selection)  # pylint: disable=eval-used
-                    
+            if not self.variable_binning:
+                for container in self.data:
+                    container.representation = self.calc_mode
+
                     sample = []
-                    dims_log = [d.is_log for d in specie.binning]
-                    dims_ire = [d.is_irregular for d in specie.binning]
+                    dims_log = [d.is_log for d in self.apply_mode]
+                    dims_ire = [d.is_irregular for d in self.apply_mode]
                     for dim, is_log, is_ire in zip(
-                        reg_specie.binning, dims_log, dims_ire
+                        self.regularized_apply_mode, dims_log, dims_ire
                     ):
-                        
                         if is_log and not is_ire:
                             container.representation = "log_events"
-                            sample.append(container[dim.name][sel_mask])
+                            sample.append(container[dim.name])
                         else:
                             container.representation = "events"
-                            sample.append(container[dim.name][sel_mask])
+                            sample.append(container[dim.name])
 
                     if self.unweighted:
                         if "astro_weights" in container.keys:
                             weights = np.ones_like(
-                                container["weights"][sel_mask] + container["astro_weights"][sel_mask]
+                                container["weights"] + container["astro_weights"]
                             )
                         else:
-                            weights = np.ones_like(container["weights"][sel_mask])
+                            weights = np.ones_like(container["weights"])
                     else:
                         if "astro_weights" in container.keys:
-                            weights = container["weights"][sel_mask] + container["astro_weights"][sel_mask]
+                            weights = container["weights"] + container["astro_weights"]
                         else:
-                            weights = container["weights"][sel_mask]
+                            weights = container["weights"]
                     if self.apply_unc_weights:
-                        unc_weights = container["unc_weights"][sel_mask]
+                        unc_weights = container["unc_weights"]
                     else:
                         unc_weights = np.ones(weights.shape)
 
@@ -254,21 +237,95 @@ def apply_function(self):
                     # and regular
                     hist = histogram(
                         sample,
-                        (unc_weights*weights),
-                        MultiDimBinning(reg_specie.binning),
+                        unc_weights*weights,
+                        self.regularized_apply_mode,
                         averaged=False
                     )
-                    hists.append(hist)
 
                     if self.error_method == "sumw2":
-                        errors.append( np.sqrt(histogram(sample, np.square(unc_weights*weights),
-                            reg_specie.binning, averaged=False)) )
-                        bin_unc2s.append( histogram(sample, np.square(unc_weights)*weights,
-                            reg_specie.binning, averaged=False) )
+                        sumw2 = histogram(sample, np.square(unc_weights*weights),
+                            self.regularized_apply_mode, averaged=False)
+                        bin_unc2 = histogram(sample, np.square(unc_weights)*weights,
+                            self.regularized_apply_mode, averaged=False)
 
-                container.representation = self.apply_mode
-                container["weights"] = self.apply_mode, hists
+                    container.representation = self.apply_mode
+                    container["weights"] = hist
 
-                if self.error_method == "sumw2":
-                    container["errors"] = self.apply_mode, errors
-                    container["bin_unc2"] = self.apply_mode, bin_unc2s
+                    if self.error_method == "sumw2":
+                        container["errors"] = np.sqrt(sumw2)
+                        container["bin_unc2"] = bin_unc2
+            else:
+                for container in self.data:
+                    container.representation = self.calc_mode
+                    hists = []
+                    errors = []
+                    bin_unc2s = []
+                    for specie, reg_specie in zip(self.apply_mode.species,
+                                                  self.regularized_apply_mode):
+                        selection = specie.selection
+
+                        if selection is None:
+                            sel_mask = np.full(container["weights"].shape, True)
+                        else:
+                            for var_name in container.keys: #TODO add check that masks don't overlap
+                                # using word boundary \b to replace whole words only
+                                selection = re.sub(
+                                    r'\b{}\b'.format(var_name),
+                                    'container["%s"]' % (var_name),
+                                    selection
+                                )
+                            sel_mask = eval(selection)  # pylint: disable=eval-used
+
+                        sample = []
+                        dims_log = [d.is_log for d in specie.binning]
+                        dims_ire = [d.is_irregular for d in specie.binning]
+                        for dim, is_log, is_ire in zip(
+                            reg_specie.binning, dims_log, dims_ire
+                        ):
+
+                            if is_log and not is_ire:
+                                container.representation = "log_events"
+                                sample.append(container[dim.name][sel_mask])
+                            else:
+                                container.representation = "events"
+                                sample.append(container[dim.name][sel_mask])
+
+                        if self.unweighted:
+                            if "astro_weights" in container.keys:
+                                weights = np.ones_like(
+                                    container["weights"][sel_mask] + container["astro_weights"][sel_mask]
+                                )
+                            else:
+                                weights = np.ones_like(container["weights"][sel_mask])
+                        else:
+                            if "astro_weights" in container.keys:
+                                weights = container["weights"][sel_mask] + container["astro_weights"][sel_mask]
+                            else:
+                                weights = container["weights"][sel_mask]
+                        if self.apply_unc_weights:
+                            unc_weights = container["unc_weights"][sel_mask]
+                        else:
+                            unc_weights = np.ones(weights.shape)
+
+                        # The hist is now computed using a binning that is completely linear
+                        # and regular
+                        hist = histogram(
+                            sample,
+                            (unc_weights*weights),
+                            MultiDimBinning(reg_specie.binning),
+                            averaged=False
+                        )
+                        hists.append(hist)
+
+                        if self.error_method == "sumw2":
+                            errors.append( np.sqrt(histogram(sample, np.square(unc_weights*weights),
+                                reg_specie.binning, averaged=False)) )
+                            bin_unc2s.append( histogram(sample, np.square(unc_weights)*weights,
+                                reg_specie.binning, averaged=False) )
+
+                    container.representation = self.apply_mode
+                    container["weights"] = self.apply_mode, hists
+
+                    if self.error_method == "sumw2":
+                        container["errors"] = self.apply_mode, errors
+                        container["bin_unc2"] = self.apply_mode, bin_unc2s
diff --git a/pisa/utils/config_parser.py b/pisa/utils/config_parser.py
index 1ef2b8db8..d9d529ee2 100644
--- a/pisa/utils/config_parser.py
+++ b/pisa/utils/config_parser.py
@@ -608,7 +608,6 @@ def parse_pipeline_config(config):
             "Could not find 'binning'. Only found sections: %s"
             % config.sections()
         )
-
     # Create binning objects
     binning_dict = {}
     # Loop over binning lines
@@ -683,6 +682,8 @@ def parse_pipeline_config(config):
                         "should be defined after MultiDimBinnings used in their definition.",
                         binning
                     )
+                if sel.strip() == 'None':
+                    sel = None
                 species.append(EventSpecie(name=specie_name, selection=sel, 
                                            binning=binning_dict[bin_def]))
             binning_dict[binning] = VarMultiDimBinning(name=binning, event_species=species)

From 4a5fae50e6cc3fc1a661d06f2200ba5b061b32bd Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Sat, 9 Nov 2024 20:14:45 -0600
Subject: [PATCH 09/10] fix typo

---
 pisa/core/binning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pisa/core/binning.py b/pisa/core/binning.py
index 88e5e6b01..114cce11d 100644
--- a/pisa/core/binning.py
+++ b/pisa/core/binning.py
@@ -3389,7 +3389,7 @@ def __getitem__(self, index):
                     raise ValueError(
                         "Dimension '%s': bin index %s is invalid. Bin index"
                         " must be >= %+d and <= %+d"
-                        %(self.name, bin_index, -mylen, mylen-1)
+                        %(self.name, sp_index, -mylen, mylen-1)
                     )
                 new_species.append(deepcopy(self._species[sp_index]))
         else:

From c2a89f3cad0f07ce02216d3f958b434b1eed84d1 Mon Sep 17 00:00:00 2001
From: Maria <marialiubarska@gmail.com>
Date: Mon, 11 Nov 2024 18:59:45 -0600
Subject: [PATCH 10/10] fix leftover old var binning flag in map

---
 pisa/core/map.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pisa/core/map.py b/pisa/core/map.py
index 14237d52f..187564f1b 100755
--- a/pisa/core/map.py
+++ b/pisa/core/map.py
@@ -1681,7 +1681,7 @@ def llh(self, expected_values, binned=False):
                                  expected_values=exp_val_hi)\
                         for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        if is_not_vb:
+        if not self.variable_binning:
             return np.sum(stats.llh(actual_values=self.hist,
                                     expected_values=expected_values))
         return np.sum([np.sum(stats.llh(actual_values=hi, expected_values=exp_val_hi))\
@@ -1715,7 +1715,7 @@ def mcllh_mean(self, expected_values, binned=False):
                                  expected_values=exp_val_hi)\
                         for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        if is_not_vb:
+        if not self.variable_binning:
             return np.sum(stats.mcllh_mean(actual_values=self.hist,
                                     expected_values=expected_values))
         return np.sum([np.sum(stats.mcllh_mean(actual_values=hi, expected_values=exp_val_hi))\
@@ -1750,7 +1750,7 @@ def mcllh_eff(self, expected_values, binned=False):
                                  expected_values=exp_val_hi)\
                         for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        if is_not_vb:
+        if not self.variable_binning:
             return np.sum(stats.mcllh_eff(actual_values=self.hist,
                                     expected_values=expected_values))
         return np.sum([np.sum(stats.mcllh_eff(actual_values=hi, expected_values=exp_val_hi))\
@@ -1784,7 +1784,7 @@ def conv_llh(self, expected_values, binned=False):
                                  expected_values=exp_val_hi)\
                         for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        if is_not_vb:
+        if not self.variable_binning:
             return np.sum(stats.conv_llh(actual_values=self.hist,
                                     expected_values=expected_values))
         return np.sum([np.sum(stats.conv_llh(actual_values=hi, expected_values=exp_val_hi))\
@@ -1824,7 +1824,7 @@ def barlow_llh(self, expected_values, binned=False):
                                  expected_values=exp_val_hi)\
                         for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        if is_not_vb:
+        if not self.variable_binning:
             return np.sum(stats.barlow_llh(actual_values=self.hist,
                                     expected_values=expected_values))
         return np.sum([np.sum(stats.barlow_llh(actual_values=hi, expected_values=exp_val_hi))\
@@ -1859,7 +1859,7 @@ def mod_chi2(self, expected_values, binned=False):
                                  expected_values=exp_val_hi)\
                         for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        if is_not_vb:
+        if not self.variable_binning:
             return np.sum(stats.mod_chi2(actual_values=self.hist,
                                     expected_values=expected_values))
         return np.sum([np.sum(stats.mod_chi2(actual_values=hi, expected_values=exp_val_hi))\
@@ -1893,7 +1893,7 @@ def correct_chi2(self, expected_values, binned=False):
                                  expected_values=exp_val_hi)\
                         for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        if is_not_vb:
+        if not self.variable_binning:
             return np.sum(stats.correct_chi2(actual_values=self.hist,
                                     expected_values=expected_values))
         return np.sum([np.sum(stats.correct_chi2(actual_values=hi, expected_values=exp_val_hi))\
@@ -1927,7 +1927,7 @@ def chi2(self, expected_values, binned=False):
                                  expected_values=exp_val_hi)\
                         for hi, exp_val_hi in zip(self.hist, expected_values)]
 
-        if is_not_vb:
+        if not self.variable_binning:
             return np.sum(stats.chi2(actual_values=self.hist,
                                     expected_values=expected_values))
         return np.sum([np.sum(stats.chi2(actual_values=hi, expected_values=exp_val_hi))\