From 80beb26716f4548387613fca460f6cdad5809dfe Mon Sep 17 00:00:00 2001 From: Maria Date: Sun, 27 Oct 2024 22:33:31 -0500 Subject: [PATCH 01/10] implementation of variable multidimantional binning, expanding definition of the Map object to allow also contain variable binning histograms --- pisa/core/binning.py | 300 ++++++++++++++++++++++++++++++ pisa/core/container.py | 19 +- pisa/core/map.py | 412 +++++++++++++++++++++++++++++------------ pisa/core/pipeline.py | 4 +- 4 files changed, 604 insertions(+), 131 deletions(-) diff --git a/pisa/core/binning.py b/pisa/core/binning.py index f2849d055..bfc5e2330 100644 --- a/pisa/core/binning.py +++ b/pisa/core/binning.py @@ -3042,6 +3042,306 @@ def __len__(self): def __ne__(self, other): return not self.__eq__(other) +class EventSpecie(object): + # pylint: disable=line-too-long + r""" + Definition of an EventSpecie object. An EventSpecie object has following properties: + + * name : name of the specie + + * selection : selection criteria (e.g. cuts, flags) + + * binning : binning to use for this specie of events + + """ + # pylint: enable=line-too-long + def __init__(self, binning, name=None, selection=None): + self.name = name + self.selection = selection # TODO: format? + self.binning = binning # sould be MultiDimBinning (maybe OneDim too?) + # TODO: add checks for proper format, type + self._serializable_state = None + + @property + def serializable_state(self): + """OrderedDict containing savable state attributes""" + if self._serializable_state is None: + state = OrderedDict() + state['name'] = self.name + state['selection'] = self.selection + state['binning'] = self.binning.serializable_state + self._serializable_state = state + # Since the tex property can be modified, must set every time this + # property is called + return self._serializable_state + + @property + def hashable_state(self): + """OrderedDict containing savable state attributes""" + return self.serializable_state + + def __repr__(self): + previous_precision = np.get_printoptions()['precision'] + np.set_printoptions(precision=18) + try: + argstrs = [('%s=%r' %item) for item in + self.serializable_state.items()] + r = '%s(%s)' %(self.__class__.__name__, ',\n '.join(argstrs)) + finally: + np.set_printoptions(precision=previous_precision) + return r + +class VarMultiDimBinning(object): + # pylint: disable=line-too-long + r""" + Multi-dimentional binning with possibility of splitting sample is several event types + + """ + # pylint: enable=line-too-long + + _attrs_to_create_new = ('name', 'tex', 'bin_edges', 'is_log', 'is_lin', 'bin_names') + + def __init__(self, event_species, name=None, mask=None): + + if isinstance(event_species, MultiDimBinning): + species = [EventSpecie(name='all', selection=None, binning=event_species)] + elif isinstance(event_species, Sequence): + species = [] + for obj_num, obj in enumerate(event_species): + if isinstance(obj, EventSpecie): + species.append(obj) + elif isinstance(obj, MultiDimBinning): + species.append(EventSpecie(name='specie '+str(obj_num), + selection=None, binning=obj)) + else: + TypeError('Argument/object #%d unhandled type: %s'%(obj_num, type(obj))) + + self._species = tuple(species) + self._name = name + self._names = None + self._binnings = None + self._selections = None + self._shape = None + self._hashable_state = None + + self.mask = None # not implemented, here so Map does not complain + + @property + def shape(self): + """Variable binning shape. Defined as ...""" + if self._shape is None: + self._shape = [b.shape for b in self.binnings] + return self._shape + + @property + def name(self): + """Name of the species""" + return self._name + + @property + def species(self): + """list of the species""" + return self._species + + @property + def names(self): + """list of strings : names of each dimension contained""" + if self._names is None: + self._names = [specie.name for specie in self._species] + return self._names + + @property + def binnings(self): + """list of MultiDimBinning : binnings of each dimension contained""" + if self._binnings is None: + self._binnings = [specie.binning for specie in self._species] + return self._binnings + + @property + def selections(self): + """list of strings : selections of each dimension contained""" + if self._selections is None: + self._selections = [specie.selection for specie in self._species] + return self._selections + + @property + def serializable_state(self): + + d = OrderedDict({'event_species': [d.serializable_state for d in self]}) + d['name'] = self.name + return d + + @property + def hashable_state(self): + """Everything necessary to fully describe this object's state. Note + that objects may be returned by reference, so to prevent external + modification, the user must call deepcopy() separately on the returned + OrderedDict. + + Returns + ------- + state : OrderedDict that can be passed to instantiate a new + VarMultiDimBinning via VarMultiDimBinning(**state) + + """ + if self._hashable_state is None: + + state = OrderedDict() + # TODO: Shouldn't order matter? + #state['dimensions'] = [self[name]._hashable_state + # for name in sorted(self.names)] + state['event_species'] = [d.hashable_state for d in self.species] + state['name'] = self.name + + self._hashable_state = state + + return self._hashable_state + + def assert_array_fits(self, array): + """Check if a + + """ + if len(array) != len(self.shape): + raise ValueError( + 'Array length %s does not match variable binning length %s' + % (len(array), len(self.shape)) + ) + for sp, array_el in zip(self.shape, array): + if array_el.shape != sp: + raise ValueError( + 'Array shape %s does not match binning shape %s' + % (array_el.shape, sp) + ) + + def __repr__(self): + previous_precision = np.get_printoptions()['precision'] + np.set_printoptions(precision=18) + try: + argstrs = [('%s=%r' %item) for item in + self.serializable_state.items()] + r = '%s(%s)' %(self.__class__.__name__, ',\n '.join(argstrs)) + finally: + np.set_printoptions(precision=previous_precision) + return r + + def __add__(self, other): + other = VarMultiDimBinning(other) + return VarMultiDimBinning(chain(self, other)) + + def __mul__(self, other): + if isinstance(other, MultiDimBinning): + other = [EventSpecie(name='all', selection=None, binning=other)] + other = VarMultiDimBinning(other) + return VarMultiDimBinning(chain(self, other)) + + def __getitem__(self, index): + """Return a new EventSpecie(s), sub-selected by `index`. + + Parameters + ---------- + index : int, slice, str + if str has to be a name of event specie + + Returns + ------- + A new EventSpecie selected by `index`. + + """ + + sp_names = self.names + mylen = len(sp_names) + orig_index = index + + # indexing by name + if isinstance(index, str): + assert sp_names is not None + index = sp_names.index(index) + return deepcopy(self._species[index]) + + # indexing by int + if isinstance(index, int): + return deepcopy(self._species[index]) + + # indexing by slice + if isinstance(index, slice): + index = list(range(*index.indices(mylen))) + + if isinstance(index, Iterable): + if not isinstance(index, Sequence): + index = list(index) + for sp_index in index: + if isinstance(sp_index, str): + raise ValueError('Slicing by seq of names currently not' + ' supported') + if not index: + raise ValueError('`index` "%s" results in no species being' + ' specified.' %orig_index) + if len(index) > 1 and not np.all(np.diff(index) == 1): + raise ValueError('Indices must be monotonically' + ' increasing and adjacent.') + new_species = [] +# new_names = [] +# new_selections = [] +# new_binnings = [] + for sp_index in index: + if sp_index < -mylen or sp_index >= mylen: + raise ValueError( + "Dimension '%s': bin index %s is invalid. Bin index" + " must be >= %+d and <= %+d" + %(self.name, bin_index, -mylen, mylen-1) + ) + new_species.append(deepcopy(self._species[sp_index])) +# new_names.append(self._names[sp_index]) +# new_selections.append(self._selections[sp_index]) +# new_binnings.append(self._binnings[sp_index]) + else: + raise TypeError('Unhandled index type %s' %type(orig_index)) + + return new_species + + +# def __repr__(): # TODO + +# @property +# def midpoints(self): +# """Return a list of the contained dimensions' midpoints""" +# return [d.midpoints for d in self] + +# @property +# def weighted_centers(self): +# """Return a list of the contained dimensions' weighted_centers (e.g. +# equidistant from bin edges on logarithmic scale, if the binning is +# logarithmic; otherwise linear). Access `midpoints` attribute for +# always-linear alternative.""" +# return [d.weighted_centers for d in self] + +# @property +# def num_bins(self): +# """ +# Return a list of the contained dimensions' num_bins. +# Note that this does not accpunt for any bin mask (since it is computed per dimension) +# """ +# return [d.num_bins for d in self] + +# @property +# def tot_num_bins(self): +# """ +# Return total number of bins. +# If a bin mask is used, this will only count bins that are not masked off +# """ +# if self.mask is None : +# return np.product(self.shape) +# else : +# return np.sum(self.mask.astype(int)) + +# @property +# def units(self): +# """list : Return a list of the contained dimensions' units""" +# return [d.units for d in self] + +# def index(self, dim, use_basenames=False): + + def test_OneDimBinning(): """Unit tests for OneDimBinning class""" diff --git a/pisa/core/container.py b/pisa/core/container.py index a98895167..f1802ca84 100644 --- a/pisa/core/container.py +++ b/pisa/core/container.py @@ -15,7 +15,7 @@ import numpy as np from pisa import FTYPE -from pisa.core.binning import OneDimBinning, MultiDimBinning +from pisa.core.binning import OneDimBinning, MultiDimBinning, VarMultiDimBinning from pisa.core.map import Map, MapSet from pisa.core.translation import histogram, lookup, resample from pisa.utils.comparisons import ALLCLOSE_KW @@ -33,7 +33,7 @@ class ContainerSet(object): containers : list or None - data_specs : MultiDimBinning, "events" or None + data_specs : MultiDimBinning, VarMultiDimBinning, "events" or None """ def __init__(self, name, containers=None, data_specs=None): @@ -67,7 +67,7 @@ def representation(self, representation): """ Parameters ---------- - representation : str, MultiDimBinning or any hashable object + representation : str, MultiDimBinning, VarMultiDimBinning or any hashable object Data specs should be set to retreive the right representation i.e. the representation one is working in at the moment @@ -302,7 +302,7 @@ def representation(self, representation): key = hash(representation) if not key in self.representation_keys: self._representations[key] = representation - if isinstance(representation, MultiDimBinning): + if isinstance(representation, (MultiDimBinning,VarMultiDimBinning)): for name in representation.names: self.validity[name][key] = True elif isinstance(representation, str): @@ -355,7 +355,7 @@ def all_keys(self): @property def is_map(self): '''Is current representation a map/grid''' - return isinstance(self.representation, MultiDimBinning) + return isinstance(self.representation, (MultiDimBinning,VarMultiDimBinning)) def mark_changed(self, key): '''mark a key as changed and only what is in the current representation is valid''' @@ -415,7 +415,7 @@ def __add_data(self, key, data): elif isinstance(data, Sequence) and len(data) == 2: binning, array = data - assert isinstance(binning, MultiDimBinning) + assert isinstance(binning, (MultiDimBinning,VarMultiDimBinning)) assert hash(self.representation) == hash(binning) @@ -516,8 +516,8 @@ def translate(self, key, src_representation): # nothing to do return - from_map = isinstance(src_representation, MultiDimBinning) - to_map = isinstance(dest_representation, MultiDimBinning) + from_map = isinstance(src_representation, (MultiDimBinning,VarMultiDimBinning)) + to_map = isinstance(dest_representation, (MultiDimBinning,VarMultiDimBinning)) if self.tranlation_modes[key] == 'average': if from_map and to_map: @@ -619,9 +619,10 @@ def array_to_binned(self, key, src_representation, dest_representation, averaged """ # TODO: make work for n-dim logging.trace('Transforming %s array to binned data'%(key)) +# print('calling container.array_to_binned() method') assert src_representation in self.array_representations - assert isinstance(dest_representation, MultiDimBinning) + assert isinstance(dest_representation, (MultiDimBinning,VarMultiDimBinning)) if not dest_representation.is_irregular: sample = [] diff --git a/pisa/core/map.py b/pisa/core/map.py index 55e9f189f..fa7af4089 100755 --- a/pisa/core/map.py +++ b/pisa/core/map.py @@ -30,7 +30,7 @@ from uncertainties import unumpy as unp from pisa import ureg, HASH_SIGFIGS -from pisa.core.binning import OneDimBinning, MultiDimBinning +from pisa.core.binning import OneDimBinning, MultiDimBinning, VarMultiDimBinning from pisa.utils.comparisons import normQuant, recursiveEquality, ALLCLOSE_KW from pisa.utils.flavInt import NuFlavIntGroup from pisa.utils.hash import hash_obj @@ -201,7 +201,7 @@ def new_function(*args, **kwargs): new_state[slot] = state_updates[slot] else: new_state[slot] = deepcopy(getattr(self, slot)) - if len(new_state['binning']) == 0: + if not isinstance(new_state['binning'], VarMultiDimBinning) and len(new_state['binning']) == 0: return new_state['hist'] return Map(**new_state) return decorate(original_function, new_function) @@ -308,7 +308,8 @@ def __init__(self, name, hist, binning, error_hist=None, hash=None, super().__setattr__('_hash', hash) super().__setattr__('_full_comparison', full_comparison) - if not isinstance(binning, MultiDimBinning): + # TODO: add further checks for VarMultiDimBinning case + if not isinstance(binning, (MultiDimBinning, VarMultiDimBinning)): if isinstance(binning, Sequence): binning = MultiDimBinning(dimensions=binning) elif isinstance(binning, Mapping): @@ -318,12 +319,22 @@ def __init__(self, name, hist, binning, error_hist=None, hash=None, ' type %s' %(binning, type(binning))) self.parent_indexer = None +# if isinstance(binning, VarMultiDimBinning): +# self.varbinning = True +# else: +# self.varbinning = False + # Do the work here to set read-only attributes super().__setattr__('_binning', binning) binning.assert_array_fits(hist) - super().__setattr__( - '_hist', np.ascontiguousarray(hist) - ) + if not isinstance(binning, VarMultiDimBinning): + super().__setattr__( + '_hist', np.ascontiguousarray(hist) + ) + else: + super().__setattr__( + '_hist', [np.ascontiguousarray(hi) for hi in hist] + ) if error_hist is not None: self.set_errors(error_hist) self._normalize_values = True @@ -1548,12 +1559,21 @@ def llh(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - return stats.llh(actual_values=self.hist, - expected_values=expected_values) + if is_not_vb: + return stats.llh(actual_values=self.hist, + expected_values=expected_values) + else: + return [stats.llh(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] - return np.sum(stats.llh(actual_values=self.hist, - expected_values=expected_values)) + if is_not_vb: + return np.sum(stats.llh(actual_values=self.hist, + expected_values=expected_values)) + return np.sum([np.sum(stats.llh(actual_values=hi, expected_values=exp_val_hi))\ + for hi, exp_val_hi in zip(self.hist, expected_values)]) def mcllh_mean(self, expected_values, binned=False): """Calculate the total LMean log-likelihood value between this map and the @@ -1574,12 +1594,21 @@ def mcllh_mean(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - return stats.mcllh_mean(actual_values=self.hist, - expected_values=expected_values) + if is_not_vb: + return stats.mcllh_mean(actual_values=self.hist, + expected_values=expected_values) + else: + return [stats.mcllh_mean(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] - return np.sum(stats.mcllh_mean(actual_values=self.hist, - expected_values=expected_values)) + if is_not_vb: + return np.sum(stats.mcllh_mean(actual_values=self.hist, + expected_values=expected_values)) + return np.sum([np.sum(stats.mcllh_mean(actual_values=hi, expected_values=exp_val_hi))\ + for hi, exp_val_hi in zip(self.hist, expected_values)]) def mcllh_eff(self, expected_values, binned=False): @@ -1601,12 +1630,21 @@ def mcllh_eff(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - return stats.mcllh_eff(actual_values=self.hist, - expected_values=expected_values) + if is_not_vb: + return stats.mcllh_eff(actual_values=self.hist, + expected_values=expected_values) + else: + return [stats.mcllh_eff(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] - return np.sum(stats.mcllh_eff(actual_values=self.hist, - expected_values=expected_values)) + if is_not_vb: + return np.sum(stats.mcllh_eff(actual_values=self.hist, + expected_values=expected_values)) + return np.sum([np.sum(stats.mcllh_eff(actual_values=hi, expected_values=exp_val_hi))\ + for hi, exp_val_hi in zip(self.hist, expected_values)]) def conv_llh(self, expected_values, binned=False): """Calculate the total convoluted log-likelihood value between this map @@ -1627,12 +1665,21 @@ def conv_llh(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - return stats.conv_llh(actual_values=self.hist, - expected_values=expected_values) + if is_not_vb: + return stats.conv_llh(actual_values=self.hist, + expected_values=expected_values) + else: + return [stats.conv_llh(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] - return np.sum(stats.conv_llh(actual_values=self.hist, - expected_values=expected_values)) + if is_not_vb: + return np.sum(stats.conv_llh(actual_values=self.hist, + expected_values=expected_values)) + return np.sum([np.sum(stats.conv_llh(actual_values=hi, expected_values=exp_val_hi))\ + for hi, exp_val_hi in zip(self.hist, expected_values)]) def barlow_llh(self, expected_values, binned=False): """Calculate the total barlow log-likelihood value between this map and @@ -1659,12 +1706,22 @@ def barlow_llh(self, expected_values, binned=False): elif isinstance(expected_values, Iterable): expected_values = [reduceToHist(x) for x in expected_values] + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - return stats.barlow_llh(actual_values=self.hist, - expected_values=expected_values) + if is_not_vb: + return stats.barlow_llh(actual_values=self.hist, + expected_values=expected_values) + else: + return [stats.barlow_llh(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] + + if is_not_vb: + return np.sum(stats.barlow_llh(actual_values=self.hist, + expected_values=expected_values)) + return np.sum([np.sum(stats.barlow_llh(actual_values=hi, expected_values=exp_val_hi))\ + for hi, exp_val_hi in zip(self.hist, expected_values)]) - return np.sum(stats.barlow_llh(actual_values=self.hist, - expected_values=expected_values)) def mod_chi2(self, expected_values, binned=False): """Calculate the total modified chi2 value between this map and the map @@ -1685,12 +1742,21 @@ def mod_chi2(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - return stats.mod_chi2(actual_values=self.hist, - expected_values=expected_values) + if is_not_vb: + return stats.mod_chi2(actual_values=self.hist, + expected_values=expected_values) + else: + return [stats.mod_chi2(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] - return np.sum(stats.mod_chi2(actual_values=self.hist, - expected_values=expected_values)) + if is_not_vb: + return np.sum(stats.mod_chi2(actual_values=self.hist, + expected_values=expected_values)) + return np.sum([np.sum(stats.mod_chi2(actual_values=hi, expected_values=exp_val_hi))\ + for hi, exp_val_hi in zip(self.hist, expected_values)]) def correct_chi2(self, expected_values, binned=False): """Calculate the total correct chi2 value between this map and the map @@ -1711,12 +1777,21 @@ def correct_chi2(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - return stats.correct_chi2(actual_values=self.hist, - expected_values=expected_values) + if is_not_vb: + return stats.correct_chi2(actual_values=self.hist, + expected_values=expected_values) + else: + return [stats.correct_chi2(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] - return np.sum(stats.correct_chi2(actual_values=self.hist, - expected_values=expected_values)) + if is_not_vb: + return np.sum(stats.correct_chi2(actual_values=self.hist, + expected_values=expected_values)) + return np.sum([np.sum(stats.correct_chi2(actual_values=hi, expected_values=exp_val_hi))\ + for hi, exp_val_hi in zip(self.hist, expected_values)]) def chi2(self, expected_values, binned=False): """Calculate the total chi-squared value between this map and the map @@ -1737,12 +1812,21 @@ def chi2(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - return stats.chi2(actual_values=self.hist, - expected_values=expected_values) + if is_not_vb: + return stats.chi2(actual_values=self.hist, + expected_values=expected_values) + else: + return [stats.chi2(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] - return np.sum(stats.chi2(actual_values=self.hist, - expected_values=expected_values)) + if is_not_vb: + return np.sum(stats.chi2(actual_values=self.hist, + expected_values=expected_values)) + return np.sum([np.sum(stats.chi2(actual_values=hi, expected_values=exp_val_hi))\ + for hi, exp_val_hi in zip(self.hist, expected_values)]) def signed_sqrt_mod_chi2(self, expected_values): """Calculate the binwise (signed) square-root of the modified chi2 value @@ -1761,8 +1845,13 @@ def signed_sqrt_mod_chi2(self, expected_values): """ expected_values = reduceToHist(expected_values) - return stats.signed_sqrt_mod_chi2(actual_values=self.hist, - expected_values=expected_values) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) + if is_not_vb: + return stats.chi2(actual_values=self.hist, + expected_values=expected_values) + return [stats.chi2(actual_values=hi, + expected_values=exp_val_hi)\ + for hi, exp_val_hi in zip(self.hist, expected_values)] def generalized_poisson_llh(self, expected_values=None, empty_bins=None, binned=False): @@ -1785,6 +1874,9 @@ def generalized_poisson_llh(self, expected_values=None, empty_bins=None, binned= llh_per_bin = stats.generalized_poisson_llh(actual_values=self.hist, expected_values=expected_values, empty_bins=empty_bins) + #TODO: implement for VarMultiDimBinning + if isinstance(self.binning, VarMultiDimBinning): + raise ValueError("generalized_poisson_llh not implemented for Maps with VarMultiDimBinning") if binned: return llh_per_bin @@ -1869,7 +1961,10 @@ def hist(self): # Apply bin mask, if one exists # Set masked off elements to NaN (handling both cases where the hst is either a simple array, or has uncertainties) if self.binning.mask is not None : - hist[~self.binning.mask] = ufloat(np.NaN, np.NaN) if isinstance(self._hist[np.unravel_index(0, self._hist.shape)], uncertainties.core.Variable) else np.NaN #TODO Is there a better way to check if this is a uarray? + if not isinstance(self.binning, VarMultiDimBinning): + hist[~self.binning.mask] = ufloat(np.NaN, np.NaN) if isinstance(self._hist[np.unravel_index(0, self._hist.shape)], uncertainties.core.Variable) else np.NaN #TODO Is there a better way to check if this is a uarray? + else: + raise ValueError("Masking for Maps with Variable binning is not implemented") # Done return hist @@ -1877,12 +1972,18 @@ def hist(self): @property def nominal_values(self): """numpy.ndarray : Bin values stripped of uncertainties""" - return unp.nominal_values(self.hist) + if not isinstance(self.binning, VarMultiDimBinning): + return unp.nominal_values(self.hist) + else: + return [unp.nominal_values(hi) for hi in self.hist] @property def std_devs(self): """numpy.ndarray : Uncertainties (standard deviations) per bin""" - return unp.std_devs(self.hist) + if not isinstance(self.binning, VarMultiDimBinning): + return unp.std_devs(self.hist) + else: + return [unp.std_devs(hi) for hi in self.hist] @property def binning(self): @@ -1903,33 +2004,42 @@ def full_comparison(self, value): @_new_obj def __abs__(self): + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "|%s|" % (self.name,), #'tex': r"{\left| %s \right|}" % strip_outer_parens(self.tex), - 'hist': np.abs(self.hist) + 'hist': np.abs(self.hist) if is_not_vb else \ + [np.abs(hi) for hi in self.hist] } return state_updates @_new_obj def __add__(self, other): """Add `other` to self""" + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s + %s)" % (self.name, other), #'tex': r"{(%s + %s)}" % (self.tex, other), - 'hist': self.hist + other + 'hist': self.hist + other if is_not_vb else \ + [hi+other for hi in self.hist], } elif isinstance(other, np.ndarray): - state_updates = { - #'name': "(%s + array)" % self.name, - #'tex': r"{(%s + X)}" % self.tex, - 'hist': self.hist + other - } + if is_not_vb: + state_updates = { + #'name': "(%s + array)" % self.name, + #'tex': r"{(%s + X)}" % self.tex, + 'hist': self.hist + other + } + else: + raise ValueError("Addition of numpy.ndarray is not supported \ + for Map with VarMultiDimBinning") elif isinstance(other, Map): state_updates = { #'name': "(%s + %s)" % (self.name, other.name), #'tex': r"{(%s + %s)}" % (self.tex, other.tex), - 'hist': self.hist + other.hist, + 'hist': self.hist + other.hist if is_not_vb else \ + [hi+other_hi for hi, other_hi in zip(self.hist,other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), } @@ -1939,25 +2049,66 @@ def __add__(self, other): #def __cmp__(self, other): + + @_new_obj + def __sub__(self, other): + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) + if np.isscalar(other) or type(other) is uncertainties.core.Variable: + state_updates = { + #'name': "(%s - %s)" % (self.name, other), + #'tex': "{(%s - %s)}" % (self.tex, other), + 'hist': self.hist - other if is_not_vb else \ + [hi-other for hi in self.hist], + } + elif isinstance(other, np.ndarray): + if is_not_vb: + state_updates = { + #'name': "(%s - array)" % self.name, + #'tex': "{(%s - X)}" % self.tex, + 'hist': self.hist - other, + } + else: + raise ValueError("Substraction of numpy.ndarray is not supported for \ + Map with VarMultiDimBinning") + elif isinstance(other, Map): + state_updates = { + #'name': "%s - %s" % (self.name, other.name), + #'tex': "{(%s - %s)}" % (self.tex, other.tex), + 'hist': self.hist - other.hist if is_not_vb else \ + [hi-other_hi for hi, other_hi in zip(self.hist,other.hist)], + 'full_comparison': (self.full_comparison or + other.full_comparison), + } + else: + type_error(other) + return state_updates + @_new_obj def __div__(self, other): + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s / %s)" % (self.name, other), #'tex': r"{(%s / %s)}" % (self.tex, other), - 'hist': self.hist / other + 'hist': self.hist / other if is_not_vb else \ + [hi/other for hi in self.hist] } elif isinstance(other, np.ndarray): - state_updates = { - #'name': "(%s / array)" % self.name, - #'tex': r"{(%s / X)}" % self.tex, - 'hist': self.hist / other - } + if is_not_vb: + state_updates = { + #'name': "(%s / array)" % self.name, + #'tex': r"{(%s / X)}" % self.tex, + 'hist': self.hist / other + } + else: + raise ValueError("Division by numpy.ndarray is not supported \ + for Map with VarMultiDimBinning") elif isinstance(other, Map): state_updates = { #'name': "(%s / %s)" % (self.name, other.name), #'tex': r"{(%s / %s)}" % (self.tex, other.tex), - 'hist': self.hist / other.hist, + 'hist': self.hist / other.hist if is_not_vb else \ + [hi/other_hi for hi, other_hi in zip(self.hist,other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), } @@ -1968,6 +2119,9 @@ def __div__(self, other): def __truediv__(self, other): return self.__div__(other) + def __rtruediv__(self, other): + return self.__rdiv__(other) + def __floordiv__(self, other): raise NotImplementedError('floordiv not implemented for type Map') @@ -1985,15 +2139,23 @@ def __eq__(self, other): Otherwise, simply checks that the hashes are equal. """ + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other): - return np.all(self.nominal_values == other) + if is_not_vb: + return np.all(self.nominal_values == other) + else: + return np.all([np.all(nv==other) for nv in self.nominal_values]) if type(other) is uncertainties.core.Variable \ or isinstance(other, np.ndarray): - return (np.all(self.nominal_values - == unp.nominal_values(other)) - and np.all(self.std_devs - == unp.std_devs(other))) + if is_not_vb: + return (np.all(self.nominal_values + == unp.nominal_values(other)) + and np.all(self.std_devs + == unp.std_devs(other))) + else: + raise ValueError("Comparison numpy.ndarray or uncertainties.core.Variable \ + is not supported for Map with VarMultiDimBinning") if isinstance(other, Map): if (self.full_comparison or other.full_comparison @@ -2001,6 +2163,7 @@ def __eq__(self, other): return recursiveEquality(self.hashable_state, other.hashable_state) return self.hash == other.hash + type_error(other) @@ -2013,10 +2176,11 @@ def log(self): log_map : Map """ + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "log(%s)" % self.name, #'tex': r"\ln\left( %s \right)" % self.tex, - 'hist': unp.log(self.hist) + 'hist': unp.log(self.hist) if is_not_vb else [unp.log(hi) for hi in self.hist] } return state_updates @@ -2029,32 +2193,40 @@ def log10(self): log10_map : Map """ + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "log10(%s)" % self.name, #'tex': r"\log_{10}\left( %s \right)" % self.tex, - 'hist': unp.log10(self.hist) + 'hist': unp.log10(self.hist) if is_not_vb else [unp.log10(hi) for hi in self.hist] } return state_updates @_new_obj def __mul__(self, other): + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "%s * %s" % (other, self.name), #'tex': r"%s \cdot %s" % (other, self.tex), - 'hist': self.hist * other + 'hist': self.hist * other if is_not_vb else \ + [hi * other for hi in self.hist], } elif isinstance(other, np.ndarray): - state_updates = { - #'name': "array * %s" % self.name, - #'tex': r"X \cdot %s" % self.tex, - 'hist': self.hist * other, - } + if is_not_vb: + state_updates = { + #'name': "array * %s" % self.name, + #'tex': r"X \cdot %s" % self.tex, + 'hist': self.hist * other, + } + else: + raise ValueError("Multiplication by numpy.ndarray is not \ + supported for Map with VarMultiDimBinning") elif isinstance(other, Map): state_updates = { #'name': "%s * %s" % (self.name, other.name), #'tex': r"%s \cdot %s" % (self.tex, other.tex), - 'hist': self.hist * other.hist, + 'hist': self.hist * other.hist if is_not_vb else \ + [hi * other_hi for hi, other_hi in zip(self.hist,other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), } @@ -2067,33 +2239,41 @@ def __ne__(self, other): @_new_obj def __neg__(self): + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "-%s" % self.name, #'tex': r"-%s" % self.tex, - 'hist': -self.hist, + 'hist': -self.hist if is_not_vb else [-hi for hi in self.hist], } return state_updates @_new_obj def __pow__(self, other): + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "%s**%s" % (self.name, other), #'tex': "%s^{%s}" % (self.tex, other), - 'hist': unp.pow(self.hist, other) + 'hist': unp.pow(self.hist, other) if is_not_vb else \ + [unp.pow(hi, other) for hi in self.hist], } elif isinstance(other, np.ndarray): - state_updates = { - #'name': "%s**(array)" % self.name, - #'tex': r"%s^{X}" % self.tex, - 'hist': unp.pow(self.hist, other), - } + if is_not_vb: + state_updates = { + #'name': "%s**(array)" % self.name, + #'tex': r"%s^{X}" % self.tex, + 'hist': unp.pow(self.hist, other), + } + else: + raise ValueError("Power operation using numpy.ndarray is \ + not supported for Map with VarMultiDimBinning") elif isinstance(other, Map): state_updates = { #'name': "%s**(%s)" % (self.name, # strip_outer_parens(other.name)), #'tex': r"%s^{%s}" % (self.tex, strip_outer_parens(other.tex)), - 'hist': unp.pow(self.hist, other.hist), + 'hist': unp.pow(self.hist, other.hist) if is_not_vb else \ + [unp.pow(hi, other_hi) for hi, other_hi in zip(self.hist, other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), } @@ -2111,18 +2291,24 @@ def __rdiv__(self, other): @_new_obj def __rdiv(self, other): + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s / %s)" % (other, self.name), #'tex': "{(%s / %s)}" % (other, self.tex), - 'hist': other / self.hist, + 'hist': other / self.hist if is_not_vb else \ + [other / hi for hi in self.hist], } elif isinstance(other, np.ndarray): - state_updates = { - #'name': "array / %s" % self.name, - #'tex': "{(X / %s)}" % self.tex, - 'hist': other / self.hist, - } + if is_not_vb: + state_updates = { + #'name': "array / %s" % self.name, + #'tex': "{(X / %s)}" % self.tex, + 'hist': other / self.hist, + } + else: + raise ValueError("Division of numpy.ndarray by Map with \ + VarMultiDimBinning is not supported") else: type_error(other) return state_updates @@ -2137,18 +2323,24 @@ def __rsub__(self, other): @_new_obj def __rsub(self, other): + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s - %s)" % (other, self.name), #'tex': "{(%s - %s)}" % (other, self.tex), - 'hist': other - self.hist, + 'hist': other - self.hist if is_not_vb else \ + [other - hi for hi in self.hist], } elif isinstance(other, np.ndarray): - state_updates = { - #'name': "(array - %s)" % self.name, - #'tex': "{(X - %s)}" % self.tex, - 'hist': other - self.hist, - } + if is_not_vb: + state_updates = { + #'name': "(array - %s)" % self.name, + #'tex': "{(X - %s)}" % self.tex, + 'hist': other - self.hist, + } + else: + raise ValueError("Substarction of numpy.ndarray by Map \ + with VarMultiDimBinning is not supported") else: type_error(other) return state_updates @@ -2162,44 +2354,24 @@ def sqrt(self): sqrt_map : Map """ + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "sqrt(%s)" % self.name, #'tex': r"\sqrt{%s}" % self.tex, #'hist': np.asarray(unp.sqrt(self.hist), dtype='float'), - 'hist': unp.sqrt(self.hist), + 'hist': unp.sqrt(self.hist) if is_not_vb else [unp.sqrt(hi) for hi in self.hist], } return state_updates - @_new_obj - def __sub__(self, other): - if np.isscalar(other) or type(other) is uncertainties.core.Variable: - state_updates = { - #'name': "(%s - %s)" % (self.name, other), - #'tex': "{(%s - %s)}" % (self.tex, other), - 'hist': self.hist - other, - } - elif isinstance(other, np.ndarray): - state_updates = { - #'name': "(%s - array)" % self.name, - #'tex': "{(%s - X)}" % self.tex, - 'hist': self.hist - other, - } - elif isinstance(other, Map): - state_updates = { - #'name': "%s - %s" % (self.name, other.name), - #'tex': "{(%s - %s)}" % (self.tex, other.tex), - 'hist': self.hist - other.hist, - 'full_comparison': (self.full_comparison or - other.full_comparison), - } - else: - type_error(other) - return state_updates - def allclose(self, other): '''Check if this map and another have the same (within machine precision) bin counts''' self.assert_compat(other) - return np.allclose(self.nominal_values, other.nominal_values, **ALLCLOSE_KW) + is_not_vb = not isinstance(self.binning, VarMultiDimBinning) + if is_not_vb: + return np.allclose(self.nominal_values, other.nominal_values, **ALLCLOSE_KW) + else: + return np.all([np.allclose(nv, other_nv, **ALLCLOSE_KW) \ + for nv, other_nv in zip(self.nominal_values, other.nominal_values)]) # TODO: instantiate individual maps from dicts if passed as such, so user diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py index 311e21ee9..abbdd2443 100755 --- a/pisa/core/pipeline.py +++ b/pisa/core/pipeline.py @@ -27,7 +27,7 @@ from pisa.core.param import ParamSet, DerivedParam from pisa.core.stage import Stage from pisa.core.container import ContainerSet -from pisa.core.binning import MultiDimBinning +from pisa.core.binning import MultiDimBinning, VarMultiDimBinning from pisa.utils.config_parser import PISAConfigParser, parse_pipeline_config from pisa.utils.fileio import mkdir from pisa.utils.hash import hash_obj @@ -326,7 +326,7 @@ def get_outputs(self, output_binning=None, output_key=None): output_binning = self.output_binning output_key = self.output_key else: - assert(isinstance(output_binning, MultiDimBinning)) + assert(isinstance(output_binning, (MultiDimBinning,VarMultiDimBinning))) assert output_binning is not None From 6ef11c3b9678f5cfa3279ffa4c46c28845e4fc4b Mon Sep 17 00:00:00 2001 From: Maria Date: Wed, 30 Oct 2024 12:44:24 -0500 Subject: [PATCH 02/10] config parser update to include variable binning and added more methods to VarMultiDimBinnig --- pisa/core/binning.py | 95 ++++++++++++++++++------------------- pisa/utils/config_parser.py | 27 ++++++++++- 2 files changed, 72 insertions(+), 50 deletions(-) diff --git a/pisa/core/binning.py b/pisa/core/binning.py index bfc5e2330..b7fe7f302 100644 --- a/pisa/core/binning.py +++ b/pisa/core/binning.py @@ -3123,6 +3123,7 @@ def __init__(self, event_species, name=None, mask=None): self._selections = None self._shape = None self._hashable_state = None + self._hash = None self.mask = None # not implemented, here so Map does not complain @@ -3197,6 +3198,16 @@ def hashable_state(self): return self._hashable_state + @property + def hash(self): + """Unique hash value for this object""" + if self._hash is None: + self._hash = hash_obj(self.hashable_state) + return self._hash + + def __hash__(self): + return self.hash + def assert_array_fits(self, array): """Check if a @@ -3213,6 +3224,33 @@ def assert_array_fits(self, array): % (array_el.shape, sp) ) + def assert_compat(self, other): + """Check if a (possibly different) binning can map onto the defined + binning. Allows for simple re-binning schemes (but no interpolation). + + Parameters + ---------- + other : Binning or container with attribute "binning" + + Returns + ------- + compat : bool + + """ + if not isinstance(other, VarMultiDimBinning): + for val in other.__dict__.values(): + if isinstance(val, VarMultiDimBinning): + other = val + break + assert isinstance(other, VarMultiDimBinning), str(type(other)) + if other == self: + return True + for my_sp, other_sp in zip(self, other): + if not (my_sp.binning.assert_compat(other_sp.binning) + and my_sp.selection == other_sp.selection): + return False + return True + def __repr__(self): previous_precision = np.get_printoptions()['precision'] np.set_printoptions(precision=18) @@ -3234,6 +3272,14 @@ def __mul__(self, other): other = VarMultiDimBinning(other) return VarMultiDimBinning(chain(self, other)) + def __eq__(self, other): + if not isinstance(other, VarMultiDimBinning): + return False + return recursiveEquality(self.hashable_state, other.hashable_state) + + def __ne__(self, other): + return not self.__eq__(other) + def __getitem__(self, index): """Return a new EventSpecie(s), sub-selected by `index`. @@ -3280,9 +3326,6 @@ def __getitem__(self, index): raise ValueError('Indices must be monotonically' ' increasing and adjacent.') new_species = [] -# new_names = [] -# new_selections = [] -# new_binnings = [] for sp_index in index: if sp_index < -mylen or sp_index >= mylen: raise ValueError( @@ -3291,57 +3334,11 @@ def __getitem__(self, index): %(self.name, bin_index, -mylen, mylen-1) ) new_species.append(deepcopy(self._species[sp_index])) -# new_names.append(self._names[sp_index]) -# new_selections.append(self._selections[sp_index]) -# new_binnings.append(self._binnings[sp_index]) else: raise TypeError('Unhandled index type %s' %type(orig_index)) return new_species - -# def __repr__(): # TODO - -# @property -# def midpoints(self): -# """Return a list of the contained dimensions' midpoints""" -# return [d.midpoints for d in self] - -# @property -# def weighted_centers(self): -# """Return a list of the contained dimensions' weighted_centers (e.g. -# equidistant from bin edges on logarithmic scale, if the binning is -# logarithmic; otherwise linear). Access `midpoints` attribute for -# always-linear alternative.""" -# return [d.weighted_centers for d in self] - -# @property -# def num_bins(self): -# """ -# Return a list of the contained dimensions' num_bins. -# Note that this does not accpunt for any bin mask (since it is computed per dimension) -# """ -# return [d.num_bins for d in self] - -# @property -# def tot_num_bins(self): -# """ -# Return total number of bins. -# If a bin mask is used, this will only count bins that are not masked off -# """ -# if self.mask is None : -# return np.product(self.shape) -# else : -# return np.sum(self.mask.astype(int)) - -# @property -# def units(self): -# """list : Return a list of the contained dimensions' units""" -# return [d.units for d in self] - -# def index(self, dim, use_basenames=False): - - def test_OneDimBinning(): """Unit tests for OneDimBinning class""" diff --git a/pisa/utils/config_parser.py b/pisa/utils/config_parser.py index a8daa7a73..1ef2b8db8 100644 --- a/pisa/utils/config_parser.py +++ b/pisa/utils/config_parser.py @@ -590,7 +590,7 @@ def parse_pipeline_config(config): """ # Note: imports placed here to avoid circular imports - from pisa.core.binning import MultiDimBinning, OneDimBinning + from pisa.core.binning import MultiDimBinning, OneDimBinning, VarMultiDimBinning, EventSpecie from pisa.core.param import ParamSelector, DerivedParam if isinstance(config, str): @@ -660,7 +660,32 @@ def parse_pipeline_config(config): mask = eval(mask) # Create the binning object binning_dict[binning] = MultiDimBinning(bins, name=binning, mask=mask) + # Now read the variable binning definitions + elif name.endswith('.specie_names'): + specie_names = split(config.get('binning', name)) + binning, _ = split(name, sep='.') + species = [] + for specie_name in specie_names: + try: + sel = config.get('binning', binning + '.' + specie_name + '.selection') + bin_def = config.get('binning', binning + '.' + specie_name + '.binning') + except: + logging.error( + "Failed to find definition of '%s'" + " variable binning entry.", + binning + ) + if not bin_def in binning_dict: + logging.error( + "Failed to find definition of '%s'" + " binning used in '%s' variable binning entry. Variable binnings" + "should be defined after MultiDimBinnings used in their definition.", + binning + ) + species.append(EventSpecie(name=specie_name, selection=sel, + binning=binning_dict[bin_def])) + binning_dict[binning] = VarMultiDimBinning(name=binning, event_species=species) stage_dicts = OrderedDict() From 6b3ddaa4627d1efdc9f4caf737d0620e62b95224 Mon Sep 17 00:00:00 2001 From: Maria Date: Wed, 30 Oct 2024 12:57:27 -0500 Subject: [PATCH 03/10] rest of the Variable binning implementation --- pisa/core/container.py | 83 ++++++++++--- pisa/core/map.py | 16 ++- pisa/core/pipeline.py | 4 +- pisa/stages/utils/hist.py | 255 ++++++++++++++++++++++++++------------ 4 files changed, 252 insertions(+), 106 deletions(-) diff --git a/pisa/core/container.py b/pisa/core/container.py index f1802ca84..24adb575a 100644 --- a/pisa/core/container.py +++ b/pisa/core/container.py @@ -302,9 +302,15 @@ def representation(self, representation): key = hash(representation) if not key in self.representation_keys: self._representations[key] = representation - if isinstance(representation, (MultiDimBinning,VarMultiDimBinning)): + if isinstance(representation, MultiDimBinning): for name in representation.names: self.validity[name][key] = True + elif isinstance(representation, VarMultiDimBinning): +# self.representation = 'events' + for bn in representation.binnings: + for bn_name in bn.names: +# print(bn_name, key) + self.validity[bn_name][key] = True elif isinstance(representation, str): if representation not in self.array_representations: raise ValueError(f"Unknown representation '{representation}'") @@ -418,22 +424,43 @@ def __add_data(self, key, data): assert isinstance(binning, (MultiDimBinning,VarMultiDimBinning)) assert hash(self.representation) == hash(binning) - - is_flat = array.shape[0] == binning.size - - if is_flat: - flat_array = array - else: - # first dimesnions must match - assert array.shape[:binning.num_dims] == binning.shape - - - if array.ndim > binning.num_dims: - flat_shape = (binning.size, -1) + if isinstance(binning, MultiDimBinning): + is_flat = array.shape[0] == binning.size + + if is_flat: + flat_array = array else: - flat_shape = (binning.size, ) - flat_array = array.reshape(flat_shape) - self.current_data[key] = flat_array + # first dimesnions must match + assert array.shape[:binning.num_dims] == binning.shape + + + if array.ndim > binning.num_dims: + flat_shape = (binning.size, -1) + else: + flat_shape = (binning.size, ) + flat_array = array.reshape(flat_shape) + self.current_data[key] = flat_array + elif isinstance(binning, VarMultiDimBinning): + # TODO: deal with duplicate code + arrays = [] + for arr, binn in zip(array,binning): + arr_binning = binn.binning + is_flat = arr.shape[0] == arr_binning.size + + if is_flat: + flat_array = arr + else: + # first dimesnions must match + assert arr.shape[:arr_binning.num_dims] == arr_binning.shape + + + if array.ndim > arr_binning.num_dims: + flat_shape = (arr_binning.size, -1) + else: + flat_shape = (arr_binning.size, ) + flat_array = arr.reshape(flat_shape) + arrays.append(flat_array) + self.current_data[key] = arrays else: raise TypeError('unknown dataformat') @@ -474,11 +501,26 @@ def get_hist(self, key): """Return reshaped data as normal n-dimensional histogram""" assert self.is_map, 'Cannot retrieve hists from non-map data' - # retrieve in case needs translation - self[key] + # retrieve in case needs translation (except VarMultiDimBinning) +# print(key, self.keys) + if not isinstance(self.representation, VarMultiDimBinning): + self[key] binning = self.representation data = self[key] + print(binning, data) + + if isinstance(binning, VarMultiDimBinning): + assert len(binning.species) == len(data) + reshaped_data = [] + for d, binn in zip(data,binning.binnings): + if d.ndim > binn.num_dims: + full_shape = list(binn.shape) + [-1] + else: + full_shape = list(binn.shape) + reshaped_data.append(d.reshape(full_shape)) + return reshaped_data, binning + if data.ndim > binning.num_dims: full_shape = list(binning.shape) + [-1] else: @@ -493,7 +535,10 @@ def get_map(self, key, error=None): error_hist = np.abs(self.get_hist(error)[0]) else: error_hist = None - assert hist.ndim == binning.num_dims + if isinstance(binning, VarMultiDimBinning): + assert len(binning.species) == len(hist) + else: + assert hist.ndim == binning.num_dims return Map(name=self.name, hist=hist, error_hist=error_hist, binning=binning) def __iter__(self): diff --git a/pisa/core/map.py b/pisa/core/map.py index fa7af4089..a18934066 100755 --- a/pisa/core/map.py +++ b/pisa/core/map.py @@ -500,10 +500,16 @@ def set_errors(self, error_hist): ) return self.assert_compat(error_hist) - super().__setattr__( - '_hist', - unp.uarray(self.nominal_values, np.ascontiguousarray(error_hist)) - ) + if not isinstance(self.binning, VarMultiDimBinning): + super().__setattr__( + '_hist', + unp.uarray(self.nominal_values, np.ascontiguousarray(error_hist)) + ) + else: + super().__setattr__( + '_hist', [unp.uarray(nv, np.ascontiguousarray(ehi)) \ + for nv, ehi in zip(self.nominal_values,error_hist)] + ) # TODO: make this return an OrderedDict to organize all of the returned # objects @@ -1338,7 +1344,7 @@ def from_json(cls, resource): def assert_compat(self, other): if np.isscalar(other) or type(other) is uncertainties.core.Variable: return - elif isinstance(other, np.ndarray): + elif isinstance(other, (np.ndarray, list)): self.binning.assert_array_fits(other) elif isinstance(other, Map): self.binning.assert_compat(other.binning) diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py index abbdd2443..d68a364a0 100755 --- a/pisa/core/pipeline.py +++ b/pisa/core/pipeline.py @@ -330,7 +330,9 @@ def get_outputs(self, output_binning=None, output_key=None): assert output_binning is not None - self.data.representation = output_binning + # TODO: figure out how this is messing up variable binning!!! +# self.data.representation = output_binning +# print(self.data.keys) if isinstance(output_key, tuple): assert len(output_key) == 2 diff --git a/pisa/stages/utils/hist.py b/pisa/stages/utils/hist.py index de8baafb6..bb6aa5585 100644 --- a/pisa/stages/utils/hist.py +++ b/pisa/stages/utils/hist.py @@ -7,9 +7,10 @@ from pisa.core.stage import Stage from pisa.core.translation import histogram -from pisa.core.binning import MultiDimBinning, OneDimBinning +from pisa.core.binning import MultiDimBinning, OneDimBinning, VarMultiDimBinning, EventSpecie from pisa.utils.profiler import profile from pisa.utils.log import logging +import re class hist(Stage): # pylint: disable=invalid-name @@ -37,17 +38,30 @@ def __init__( assert self.calc_mode is not None assert self.apply_mode is not None self.regularized_apply_mode = None + self.variable_binning = None self.apply_unc_weights = apply_unc_weights self.unweighted = unweighted def setup_function(self): - assert isinstance(self.apply_mode, MultiDimBinning), ( + assert isinstance(self.apply_mode, (MultiDimBinning, VarMultiDimBinning)), ( "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode ) +# for container in self.data: +# print('initial', container.keys) + + self.variable_binning = False + if isinstance(self.apply_mode, VarMultiDimBinning): + self.variable_binning = True + if isinstance(self.calc_mode, MultiDimBinning): + if self.variable_binning: + raise NotImplementedError( + "MultiDimBinning to VarMultiDimBinning conversion is not implemented!" + ) + # The two binning must be exclusive assert len(set(self.calc_mode.names) & set(self.apply_mode.names)) == 0 @@ -63,54 +77,108 @@ def setup_function(self): self.data.representation = self.calc_mode container["hist_transform"] = transform + elif isinstance(self.calc_mode, VarMultiDimBinning): + raise NotImplementedError( + "Using VarMultiDimBinning as calc_mode is not implemented!" + ) +# assert len(set(self.calc_mode.names) & set(self.apply_mode.names)) == 0 +# for calc_mode_sp, apply_mode_sp in zip(self.calc_mode, self.apply_mode): +# assert len(set(calc_mode_sp.binning.names) & set(apply_mode_sp.binning.names)) == 0 + # TODO + elif self.calc_mode == "events": # For dimensions where the binning is irregular, we pre-compute the # index that each sample falls into and then bin regularly in the index. # For dimensions that are logarithmic, we add a linear binning in # the logarithm. - dimensions = [] - for dim in self.apply_mode: - if dim.is_irregular: - # create a new axis with digitized variable - varname = dim.name + "__" + self.apply_mode.name + "_idx" - new_dim = OneDimBinning( - varname, domain=[0, dim.num_bins], num_bins=dim.num_bins - ) - dimensions.append(new_dim) - for container in self.data: - container.representation = "events" - x = container[dim.name] * dim.units - # Compute the bin index each sample would fall into, and - # shift by -1 such that samples below the binning range - # get assigned the index -1. - x_idx = np.searchsorted(dim.bin_edges, x, side="right") - 1 - # To be consistent with numpy histogramming, we need to - # shift those values that are exactly at the uppermost edge - # down one index such that they are included in the highest - # bin instead of being treated as an outlier. - on_edge = x == dim.bin_edges[-1] - x_idx[on_edge] -= 1 - container[varname] = x_idx - elif dim.is_log: - # We don't compute the log of the variable just yet, this - # will be done later during `apply_function` using the - # representation mechanism. - new_dim = OneDimBinning( - dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins - ) - dimensions.append(new_dim) - else: - dimensions.append(dim) - self.regularized_apply_mode = MultiDimBinning(dimensions) - logging.debug( - "Using regularized binning:\n" + str(self.regularized_apply_mode) - ) + if not self.variable_binning: + dimensions = [] + for dim in self.apply_mode: + if dim.is_irregular: + # create a new axis with digitized variable + varname = dim.name + "__" + self.apply_mode.name + "_idx" + new_dim = OneDimBinning( + varname, domain=[0, dim.num_bins], num_bins=dim.num_bins + ) + dimensions.append(new_dim) + for container in self.data: + container.representation = "events" + x = container[dim.name] * dim.units + # Compute the bin index each sample would fall into, and + # shift by -1 such that samples below the binning range + # get assigned the index -1. + x_idx = np.searchsorted(dim.bin_edges, x, side="right") - 1 + # To be consistent with numpy histogramming, we need to + # shift those values that are exactly at the uppermost edge + # down one index such that they are included in the highest + # bin instead of being treated as an outlier. + on_edge = x == dim.bin_edges[-1] + x_idx[on_edge] -= 1 + container[varname] = x_idx + elif dim.is_log: + # We don't compute the log of the variable just yet, this + # will be done later during `apply_function` using the + # representation mechanism. + new_dim = OneDimBinning( + dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins + ) + dimensions.append(new_dim) + else: + dimensions.append(dim) + # simplify the code by making it a VarMultiDimBinning ... + # self.regularized_apply_mode = MultiDimBinning(dimensions) + self.regularized_apply_mode = VarMultiDimBinning([EventSpecie( + binning=MultiDimBinning(dimensions))]) + logging.debug( + "Using regularized binning:\n" + str(self.regularized_apply_mode) + ) + else: + # TODO: deal with repeat code + print('calculating using var binning!!!') + species = [] + for specie in self.apply_mode.species: + dimensions = [] + for dim in specie.binning: + if dim.is_irregular: + varname = dim.name + "__" + specie.name + "_" + "_idx" + print(varname) + new_dim = OneDimBinning( + varname, domain=[0, dim.num_bins], num_bins=dim.num_bins + ) + dimensions.append(new_dim) + for container in self.data: + container.representation = "events" + x = container[dim.name] * dim.units + x_idx = np.searchsorted(dim.bin_edges, x, side="right") - 1 + on_edge = x == dim.bin_edges[-1] + x_idx[on_edge] -= 1 + container[varname] = x_idx + elif dim.is_log: + new_dim = OneDimBinning( + dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins + ) + dimensions.append(new_dim) + else: + dimensions.append(dim) + species.append(EventSpecie(name=specie.name, selection=specie.selection, + binning=MultiDimBinning(dimensions))) + self.regularized_apply_mode = VarMultiDimBinning(species) + logging.debug( + "Using regularized binning:\n" + str(self.regularized_apply_mode) + ) + else: raise ValueError(f"unknown calc mode: {self.calc_mode}") +# for container in self.data: +# print('setup_final', container.keys) + @profile def apply_function(self): +# for container in self.data: +# print('apply_initial', container.keys) + if isinstance(self.calc_mode, MultiDimBinning): if self.unweighted: @@ -144,55 +212,80 @@ def apply_function(self): elif self.calc_mode == "events": for container in self.data: +# print(container.keys) container.representation = self.calc_mode - sample = [] - dims_log = [d.is_log for d in self.apply_mode] - dims_ire = [d.is_irregular for d in self.apply_mode] - for dim, is_log, is_ire in zip( - self.regularized_apply_mode, dims_log, dims_ire - ): - if is_log and not is_ire: - container.representation = "log_events" - sample.append(container[dim.name]) - else: - container.representation = "events" - sample.append(container[dim.name]) - - if self.unweighted: - if "astro_weights" in container.keys: - weights = np.ones_like( - container["weights"] + container["astro_weights"] + print(container.keys) + hists = [] + errors = [] + bin_unc2s = [] + for specie, reg_specie in zip(self.apply_mode.species, + self.regularized_apply_mode): + selection = specie.selection + if selection is None: + sel_mask = np.ones_like(container["weights"]) + for var_name in container.keys: #TODO add check that masks don't overlap + # using word boundary \b to replace whole words only + print(type(var_name), var_name) + selection = re.sub( + r'\b{}\b'.format(var_name), + 'container["%s"]' % (var_name), + selection ) + print(selection) + sel_mask = eval(selection) # pylint: disable=eval-used + + sample = [] + dims_log = [d.is_log for d in specie.binning] + dims_ire = [d.is_irregular for d in specie.binning] + for dim, is_log, is_ire in zip( + reg_specie.binning, dims_log, dims_ire + ): + + if is_log and not is_ire: + container.representation = "log_events" + sample.append(container[dim.name][sel_mask]) + else: + container.representation = "events" + sample.append(container[dim.name][sel_mask]) + + if self.unweighted: + if "astro_weights" in container.keys: + weights = np.ones_like( + container["weights"][sel_mask] + container["astro_weights"][sel_mask] + ) + else: + weights = np.ones_like(container["weights"][sel_mask]) else: - weights = np.ones_like(container["weights"]) - else: - if "astro_weights" in container.keys: - weights = container["weights"] + container["astro_weights"] + if "astro_weights" in container.keys: + weights = container["weights"][sel_mask] + container["astro_weights"][sel_mask] + else: + weights = container["weights"][sel_mask] + if self.apply_unc_weights: + unc_weights = container["unc_weights"][sel_mask] else: - weights = container["weights"] - if self.apply_unc_weights: - unc_weights = container["unc_weights"] - else: - unc_weights = np.ones(weights.shape) + unc_weights = np.ones(weights.shape) - # The hist is now computed using a binning that is completely linear - # and regular - hist = histogram( - sample, - unc_weights*weights, - self.regularized_apply_mode, - averaged=False - ) + # The hist is now computed using a binning that is completely linear + # and regular + hist = histogram( + sample, + (unc_weights*weights), + MultiDimBinning(reg_specie.binning), + averaged=False + ) +# print(hist) + hists.append(hist) - if self.error_method == "sumw2": - sumw2 = histogram(sample, np.square(unc_weights*weights), - self.regularized_apply_mode, averaged=False) - bin_unc2 = histogram(sample, np.square(unc_weights)*weights, - self.regularized_apply_mode, averaged=False) + if self.error_method == "sumw2": + errors.append( np.sqrt(histogram(sample, np.square(unc_weights*weights), + reg_specie.binning, averaged=False)) ) + bin_unc2s.append(histogram(sample, np.square(unc_weights)*weights, + reg_specie.binning, averaged=False)) container.representation = self.apply_mode - container["weights"] = hist + container["weights"] = self.apply_mode, hists + print(container) if self.error_method == "sumw2": - container["errors"] = np.sqrt(sumw2) - container["bin_unc2"] = bin_unc2 + container["errors"] = self.apply_mode, errors + container["bin_unc2"] = self.apply_mode, bin_unc2s From a5984b5c515c3523a365909dd771b0ce23a44927 Mon Sep 17 00:00:00 2001 From: Maria Date: Sun, 3 Nov 2024 15:58:30 -0600 Subject: [PATCH 04/10] add proper tests for variable binning and EventSpecie; also update __eq__ and __repr__ methods --- pisa/core/binning.py | 195 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 179 insertions(+), 16 deletions(-) diff --git a/pisa/core/binning.py b/pisa/core/binning.py index b7fe7f302..20f10e554 100644 --- a/pisa/core/binning.py +++ b/pisa/core/binning.py @@ -3056,12 +3056,36 @@ class EventSpecie(object): """ # pylint: enable=line-too-long def __init__(self, binning, name=None, selection=None): - self.name = name - self.selection = selection # TODO: format? - self.binning = binning # sould be MultiDimBinning (maybe OneDim too?) - # TODO: add checks for proper format, type + + if not isinstance(binning, MultiDimBinning): + try: + binning = MultiDimBinning(binning) + except: + TypeError('EventSpecie accepts MultiDimBinning as \ + binning. Unable to initialize MultiDimBinning using \ + provided binning: %s'%(binning)) + + self._name = name + self._selection = selection # TODO: add check for selection format? + self._binning = binning + self._serializable_state = None + @property + def name(self): + """Name of the specie""" + return self._name + + @property + def selection(self): + """Specie selection string""" + return self._selection + + @property + def binning(self): + """Specie binning""" + return self._binning + @property def serializable_state(self): """OrderedDict containing savable state attributes""" @@ -3069,10 +3093,8 @@ def serializable_state(self): state = OrderedDict() state['name'] = self.name state['selection'] = self.selection - state['binning'] = self.binning.serializable_state + state['binning'] = 'MultiDimBinning(%s)'%self.binning.serializable_state self._serializable_state = state - # Since the tex property can be modified, must set every time this - # property is called return self._serializable_state @property @@ -3084,13 +3106,23 @@ def __repr__(self): previous_precision = np.get_printoptions()['precision'] np.set_printoptions(precision=18) try: - argstrs = [('%s=%r' %item) for item in - self.serializable_state.items()] + argstrs = [('%s=%r'%(k,self.serializable_state[k])) for k in + ('name','selection')] + argstrs.append('%s=%s'%('binning',repr(self.binning))) r = '%s(%s)' %(self.__class__.__name__, ',\n '.join(argstrs)) finally: np.set_printoptions(precision=previous_precision) return r + def __eq__(self, other): + if not isinstance(other, EventSpecie): + return False + if (self.name == other.name + and self.selection == other.selection + and self.binning == other.binning): + return True + return False + class VarMultiDimBinning(object): # pylint: disable=line-too-long r""" @@ -3136,7 +3168,7 @@ def shape(self): @property def name(self): - """Name of the species""" + """Name of the VarMultiDimBinning object""" return self._name @property @@ -3188,9 +3220,6 @@ def hashable_state(self): if self._hashable_state is None: state = OrderedDict() - # TODO: Shouldn't order matter? - #state['dimensions'] = [self[name]._hashable_state - # for name in sorted(self.names)] state['event_species'] = [d.hashable_state for d in self.species] state['name'] = self.name @@ -3255,8 +3284,9 @@ def __repr__(self): previous_precision = np.get_printoptions()['precision'] np.set_printoptions(precision=18) try: - argstrs = [('%s=%r' %item) for item in - self.serializable_state.items()] + species_argstrs = ['%r'%sp for sp in self.species] + argstrs = [('event_species=[%s]' %',\n '.join(species_argstrs))] + argstrs.append('name=%s' %self.name) r = '%s(%s)' %(self.__class__.__name__, ',\n '.join(argstrs)) finally: np.set_printoptions(precision=previous_precision) @@ -3275,7 +3305,35 @@ def __mul__(self, other): def __eq__(self, other): if not isinstance(other, VarMultiDimBinning): return False - return recursiveEquality(self.hashable_state, other.hashable_state) + # only comparing length of specie arrays since order can be different + if not len(self.shape)==len(other.shape): + return False + + # sort by selection in case names are not unique + comp_order_self = np.argsort(self.selections) + comp_order_other = np.argsort(other.selections) + for i,j in zip(comp_order_self, comp_order_other): + if self.species[i] != other.species[j]: + return False + # throw warning if species are the same but in different order + if np.any(comp_order_self != comp_order_other): + logging.warning( + 'Following VarMultiDimBinning objects are not equal because they ' + 'have different order of `event_species`:\n\n %r \n\nand\n\n %r', + self, other + ) + return False + # throw info in case binnings only differ by name + if self.name != other.name: + logging.info( + 'Following VarMultiDimBinning objects are not equal ' + 'only because of different names.' + 'Both binnings contain identical `event_species`:\n\n %r \n\nand\n\n %r', + self, other + ) + return False + + return True def __ne__(self, other): return not self.__eq__(other) @@ -3795,8 +3853,113 @@ def test_MultiDimBinning(): logging.info('<< PASS : test_MultiDimBinning >>') +def test_EventSpecie(): + + # needed so that eval(repr(ev_sp)) works + from numpy import array, float32, float64 # pylint: disable=unused-variable + + binning_2d = MultiDimBinning([dict(name='reco_energy', is_log=True, num_bins=8, + domain=[5., 80.], units='GeV'), + dict(name='reco_coszen', is_lin=True, num_bins=10, + domain=[-1, 0.]) + ]) + binning_3d = MultiDimBinning([dict(name='reco_energy', is_log=True, num_bins=12, + domain=[5., 80.], units='GeV'), + dict(name='reco_coszen', is_lin=True, num_bins=5, + domain=[-1, -0.2]), + dict(name='pid', is_lin=True, bin_edges=[0.0, 0.25, 0.55, 1.0], + bin_names=['cascades', 'mixed','tracks']) + ]) + + ev_sp1 = EventSpecie(binning=binning_2d, name='sp1_2d', selection='reco_rho36>200') + ev_sp2 = EventSpecie(binning=binning_3d, name='sp2_3d', selection='reco_rho36<=200') + ev_sp3 = EventSpecie(binning=binning_2d, name='sp1_2d', selection='reco_rho36<=200') + + name1 = ev_sp1.name + name2 = ev_sp2.name + name3 = ev_sp3.name + selection1 = ev_sp1.selection + selection2 = ev_sp2.selection + selection3 = ev_sp3.selection + binning1 = ev_sp1.binning + binning2 = ev_sp2.binning + binning3 = ev_sp3.binning + + assert name1 != name2 + assert name1 == name3 + assert selection1 != selection2 + assert selection2 == selection3 + assert binning1 != binning2 + assert binning1 == binning3 + + _ = binning1.hashable_state + + assert eval(repr(ev_sp1)) == ev_sp1 # pylint: disable=eval-used + + # TODO: saving to file + + logging.info('<< PASS : test_EventSpecie >>') + +def test_VarMultiDimBinning(): + + # needed so that eval(repr(vb)) works + from numpy import array, float32, float64 # pylint: disable=unused-variable + + binning_2d = MultiDimBinning([dict(name='reco_energy', is_log=True, num_bins=8, + domain=[5., 80.], units='GeV'), + dict(name='reco_coszen', is_lin=True, num_bins=10, + domain=[-1, 0.]) + ]) + binning_3d = MultiDimBinning([dict(name='reco_energy', is_log=True, num_bins=12, + domain=[5., 80.], units='GeV'), + dict(name='reco_coszen', is_lin=True, num_bins=5, + domain=[-1, -0.2]), + dict(name='pid', is_lin=True, bin_edges=[0.0, 0.25, 0.55, 1.0], + bin_names=['cascades', 'mixed','tracks']) + ]) + + vb = VarMultiDimBinning([EventSpecie(name='high_ndom', selection='n_hit_doms>20', binning=binning_3d), + EventSpecie(name='low_ndom', selection='n_hit_doms<=20', binning=binning_2d), + ]) + vb2 = VarMultiDimBinning([EventSpecie(name='low_ndom', selection='n_hit_doms<=20', binning=binning_2d), + EventSpecie(name='high_ndom', selection='n_hit_doms>20', binning=binning_3d), + ]) + vb2a = VarMultiDimBinning([EventSpecie(name='low_ndom', selection='n_hit_doms<=20', binning=binning_2d), + EventSpecie(name='high_ndom', selection='n_hit_doms>20', binning=binning_3d), + ], + name='2a') + vb3 = VarMultiDimBinning([EventSpecie(name='low_ndom', selection='n_hit_doms<=20', binning=binning_2d), + EventSpecie(name='mid_ndom', selection='(n_hit_doms>20)&(n_hit_doms<=30)', + binning=binning_3d), + EventSpecie(name='high_ndom', selection='n_hit_doms>30', binning=binning_3d), + ]) + + + assert eval(repr(vb)) == vb # pylint: disable=eval-used + + assert vb != vb2 + assert vb2 != vb2a + assert vb2 != vb3 + assert vb2.assert_compat(vb2a) + assert vb[0] != vb[1] + + _ = vb.shape + _ = vb.name + _ = vb.species + _ = vb.names + _ = vb.binnings + _ = vb.selections + _ = vb.serializable_state + _ = vb.hashable_state + _ = hash(vb) + + # TODO: saving to file + + logging.info('<< PASS : test_VarMultiDimBinning >>') if __name__ == "__main__": set_verbosity(1) test_OneDimBinning() test_MultiDimBinning() + test_EventSpecie() + test_VarMultiDimBinning() From 246254d8d6fa902932b43cf39f64ec01923c43c7 Mon Sep 17 00:00:00 2001 From: Maria Date: Sun, 3 Nov 2024 16:59:29 -0600 Subject: [PATCH 05/10] add a flag to Map for using variable binning --- pisa/core/map.py | 125 ++++++++++++++++++++--------------------------- 1 file changed, 53 insertions(+), 72 deletions(-) diff --git a/pisa/core/map.py b/pisa/core/map.py index a18934066..fd8b3395e 100755 --- a/pisa/core/map.py +++ b/pisa/core/map.py @@ -296,7 +296,8 @@ class Map(object): """ _slots = ('name', 'hist', 'binning', 'hash', '_hash', 'tex', - 'full_comparison', 'parent_indexer', '_normalize_values') + 'full_comparison', 'parent_indexer', '_normalize_values', + 'variable_binning') _state_attrs = ('name', 'hist', 'binning', 'hash', 'tex', 'full_comparison') @@ -319,15 +320,15 @@ def __init__(self, name, hist, binning, error_hist=None, hash=None, ' type %s' %(binning, type(binning))) self.parent_indexer = None -# if isinstance(binning, VarMultiDimBinning): -# self.varbinning = True -# else: -# self.varbinning = False + if isinstance(binning, VarMultiDimBinning): + self.variable_binning = True + else: + self.variable_binning = False # Do the work here to set read-only attributes super().__setattr__('_binning', binning) binning.assert_array_fits(hist) - if not isinstance(binning, VarMultiDimBinning): + if not self.variable_binning: super().__setattr__( '_hist', np.ascontiguousarray(hist) ) @@ -500,7 +501,7 @@ def set_errors(self, error_hist): ) return self.assert_compat(error_hist) - if not isinstance(self.binning, VarMultiDimBinning): + if not self.variable_binning: super().__setattr__( '_hist', unp.uarray(self.nominal_values, np.ascontiguousarray(error_hist)) @@ -1565,9 +1566,8 @@ def llh(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - if is_not_vb: + if not self.variable_binning: return stats.llh(actual_values=self.hist, expected_values=expected_values) else: @@ -1600,9 +1600,8 @@ def mcllh_mean(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - if is_not_vb: + if not self.variable_binning: return stats.mcllh_mean(actual_values=self.hist, expected_values=expected_values) else: @@ -1636,9 +1635,8 @@ def mcllh_eff(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - if is_not_vb: + if not self.variable_binning: return stats.mcllh_eff(actual_values=self.hist, expected_values=expected_values) else: @@ -1671,9 +1669,8 @@ def conv_llh(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - if is_not_vb: + if not self.variable_binning: return stats.conv_llh(actual_values=self.hist, expected_values=expected_values) else: @@ -1712,9 +1709,8 @@ def barlow_llh(self, expected_values, binned=False): elif isinstance(expected_values, Iterable): expected_values = [reduceToHist(x) for x in expected_values] - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - if is_not_vb: + if not self.variable_binning: return stats.barlow_llh(actual_values=self.hist, expected_values=expected_values) else: @@ -1748,9 +1744,8 @@ def mod_chi2(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - if is_not_vb: + if not self.variable_binning: return stats.mod_chi2(actual_values=self.hist, expected_values=expected_values) else: @@ -1783,9 +1778,8 @@ def correct_chi2(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - if is_not_vb: + if not self.variable_binning: return stats.correct_chi2(actual_values=self.hist, expected_values=expected_values) else: @@ -1818,9 +1812,8 @@ def chi2(self, expected_values, binned=False): """ expected_values = reduceToHist(expected_values) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if binned: - if is_not_vb: + if not self.variable_binning: return stats.chi2(actual_values=self.hist, expected_values=expected_values) else: @@ -1851,8 +1844,7 @@ def signed_sqrt_mod_chi2(self, expected_values): """ expected_values = reduceToHist(expected_values) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) - if is_not_vb: + if not self.variable_binning: return stats.chi2(actual_values=self.hist, expected_values=expected_values) return [stats.chi2(actual_values=hi, @@ -1877,12 +1869,13 @@ def generalized_poisson_llh(self, expected_values=None, empty_bins=None, binned= ''' + #TODO: implement for VarMultiDimBinning + if self.variable_binning: + raise ValueError("generalized_poisson_llh not implemented for Maps with VarMultiDimBinning") + llh_per_bin = stats.generalized_poisson_llh(actual_values=self.hist, expected_values=expected_values, empty_bins=empty_bins) - #TODO: implement for VarMultiDimBinning - if isinstance(self.binning, VarMultiDimBinning): - raise ValueError("generalized_poisson_llh not implemented for Maps with VarMultiDimBinning") if binned: return llh_per_bin @@ -1967,7 +1960,7 @@ def hist(self): # Apply bin mask, if one exists # Set masked off elements to NaN (handling both cases where the hst is either a simple array, or has uncertainties) if self.binning.mask is not None : - if not isinstance(self.binning, VarMultiDimBinning): + if not self.variable_binning: hist[~self.binning.mask] = ufloat(np.NaN, np.NaN) if isinstance(self._hist[np.unravel_index(0, self._hist.shape)], uncertainties.core.Variable) else np.NaN #TODO Is there a better way to check if this is a uarray? else: raise ValueError("Masking for Maps with Variable binning is not implemented") @@ -1978,7 +1971,7 @@ def hist(self): @property def nominal_values(self): """numpy.ndarray : Bin values stripped of uncertainties""" - if not isinstance(self.binning, VarMultiDimBinning): + if not self.variable_binning: return unp.nominal_values(self.hist) else: return [unp.nominal_values(hi) for hi in self.hist] @@ -1986,7 +1979,7 @@ def nominal_values(self): @property def std_devs(self): """numpy.ndarray : Uncertainties (standard deviations) per bin""" - if not isinstance(self.binning, VarMultiDimBinning): + if not self.variable_binning: return unp.std_devs(self.hist) else: return [unp.std_devs(hi) for hi in self.hist] @@ -2010,11 +2003,10 @@ def full_comparison(self, value): @_new_obj def __abs__(self): - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "|%s|" % (self.name,), #'tex': r"{\left| %s \right|}" % strip_outer_parens(self.tex), - 'hist': np.abs(self.hist) if is_not_vb else \ + 'hist': np.abs(self.hist) if not self.variable_binning else \ [np.abs(hi) for hi in self.hist] } return state_updates @@ -2022,16 +2014,15 @@ def __abs__(self): @_new_obj def __add__(self, other): """Add `other` to self""" - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s + %s)" % (self.name, other), #'tex': r"{(%s + %s)}" % (self.tex, other), - 'hist': self.hist + other if is_not_vb else \ + 'hist': self.hist + other if not self.variable_binning else \ [hi+other for hi in self.hist], } elif isinstance(other, np.ndarray): - if is_not_vb: + if not self.variable_binning: state_updates = { #'name': "(%s + array)" % self.name, #'tex': r"{(%s + X)}" % self.tex, @@ -2044,7 +2035,7 @@ def __add__(self, other): state_updates = { #'name': "(%s + %s)" % (self.name, other.name), #'tex': r"{(%s + %s)}" % (self.tex, other.tex), - 'hist': self.hist + other.hist if is_not_vb else \ + 'hist': self.hist + other.hist if not self.variable_binning else \ [hi+other_hi for hi, other_hi in zip(self.hist,other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), @@ -2058,16 +2049,15 @@ def __add__(self, other): @_new_obj def __sub__(self, other): - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s - %s)" % (self.name, other), #'tex': "{(%s - %s)}" % (self.tex, other), - 'hist': self.hist - other if is_not_vb else \ + 'hist': self.hist - other if not self.variable_binning else \ [hi-other for hi in self.hist], } elif isinstance(other, np.ndarray): - if is_not_vb: + if not self.variable_binning: state_updates = { #'name': "(%s - array)" % self.name, #'tex': "{(%s - X)}" % self.tex, @@ -2080,7 +2070,7 @@ def __sub__(self, other): state_updates = { #'name': "%s - %s" % (self.name, other.name), #'tex': "{(%s - %s)}" % (self.tex, other.tex), - 'hist': self.hist - other.hist if is_not_vb else \ + 'hist': self.hist - other.hist if not self.variable_binning else \ [hi-other_hi for hi, other_hi in zip(self.hist,other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), @@ -2091,16 +2081,15 @@ def __sub__(self, other): @_new_obj def __div__(self, other): - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s / %s)" % (self.name, other), #'tex': r"{(%s / %s)}" % (self.tex, other), - 'hist': self.hist / other if is_not_vb else \ + 'hist': self.hist / other if not self.variable_binning else \ [hi/other for hi in self.hist] } elif isinstance(other, np.ndarray): - if is_not_vb: + if not self.variable_binning: state_updates = { #'name': "(%s / array)" % self.name, #'tex': r"{(%s / X)}" % self.tex, @@ -2113,7 +2102,7 @@ def __div__(self, other): state_updates = { #'name': "(%s / %s)" % (self.name, other.name), #'tex': r"{(%s / %s)}" % (self.tex, other.tex), - 'hist': self.hist / other.hist if is_not_vb else \ + 'hist': self.hist / other.hist if not self.variable_binning else \ [hi/other_hi for hi, other_hi in zip(self.hist,other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), @@ -2145,16 +2134,15 @@ def __eq__(self, other): Otherwise, simply checks that the hashes are equal. """ - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other): - if is_not_vb: + if not self.variable_binning: return np.all(self.nominal_values == other) else: return np.all([np.all(nv==other) for nv in self.nominal_values]) if type(other) is uncertainties.core.Variable \ or isinstance(other, np.ndarray): - if is_not_vb: + if not self.variable_binning: return (np.all(self.nominal_values == unp.nominal_values(other)) and np.all(self.std_devs @@ -2182,11 +2170,10 @@ def log(self): log_map : Map """ - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "log(%s)" % self.name, #'tex': r"\ln\left( %s \right)" % self.tex, - 'hist': unp.log(self.hist) if is_not_vb else [unp.log(hi) for hi in self.hist] + 'hist': unp.log(self.hist) if not self.variable_binning else [unp.log(hi) for hi in self.hist] } return state_updates @@ -2199,26 +2186,25 @@ def log10(self): log10_map : Map """ - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "log10(%s)" % self.name, #'tex': r"\log_{10}\left( %s \right)" % self.tex, - 'hist': unp.log10(self.hist) if is_not_vb else [unp.log10(hi) for hi in self.hist] + 'hist': unp.log10(self.hist) if not self.variable_binning else \ + [unp.log10(hi) for hi in self.hist] } return state_updates @_new_obj def __mul__(self, other): - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "%s * %s" % (other, self.name), #'tex': r"%s \cdot %s" % (other, self.tex), - 'hist': self.hist * other if is_not_vb else \ + 'hist': self.hist * other if not self.variable_binning else \ [hi * other for hi in self.hist], } elif isinstance(other, np.ndarray): - if is_not_vb: + if not self.variable_binning: state_updates = { #'name': "array * %s" % self.name, #'tex': r"X \cdot %s" % self.tex, @@ -2231,7 +2217,7 @@ def __mul__(self, other): state_updates = { #'name': "%s * %s" % (self.name, other.name), #'tex': r"%s \cdot %s" % (self.tex, other.tex), - 'hist': self.hist * other.hist if is_not_vb else \ + 'hist': self.hist * other.hist if not self.variable_binning else \ [hi * other_hi for hi, other_hi in zip(self.hist,other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), @@ -2245,26 +2231,24 @@ def __ne__(self, other): @_new_obj def __neg__(self): - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "-%s" % self.name, #'tex': r"-%s" % self.tex, - 'hist': -self.hist if is_not_vb else [-hi for hi in self.hist], + 'hist': -self.hist if not self.variable_binning else [-hi for hi in self.hist], } return state_updates @_new_obj def __pow__(self, other): - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "%s**%s" % (self.name, other), #'tex': "%s^{%s}" % (self.tex, other), - 'hist': unp.pow(self.hist, other) if is_not_vb else \ + 'hist': unp.pow(self.hist, other) if not self.variable_binning else \ [unp.pow(hi, other) for hi in self.hist], } elif isinstance(other, np.ndarray): - if is_not_vb: + if not self.variable_binning: state_updates = { #'name': "%s**(array)" % self.name, #'tex': r"%s^{X}" % self.tex, @@ -2278,7 +2262,7 @@ def __pow__(self, other): #'name': "%s**(%s)" % (self.name, # strip_outer_parens(other.name)), #'tex': r"%s^{%s}" % (self.tex, strip_outer_parens(other.tex)), - 'hist': unp.pow(self.hist, other.hist) if is_not_vb else \ + 'hist': unp.pow(self.hist, other.hist) if not self.variable_binning else \ [unp.pow(hi, other_hi) for hi, other_hi in zip(self.hist, other.hist)], 'full_comparison': (self.full_comparison or other.full_comparison), @@ -2297,16 +2281,15 @@ def __rdiv__(self, other): @_new_obj def __rdiv(self, other): - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s / %s)" % (other, self.name), #'tex': "{(%s / %s)}" % (other, self.tex), - 'hist': other / self.hist if is_not_vb else \ + 'hist': other / self.hist if not self.variable_binning else \ [other / hi for hi in self.hist], } elif isinstance(other, np.ndarray): - if is_not_vb: + if not self.variable_binning: state_updates = { #'name': "array / %s" % self.name, #'tex': "{(X / %s)}" % self.tex, @@ -2329,16 +2312,15 @@ def __rsub__(self, other): @_new_obj def __rsub(self, other): - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) if np.isscalar(other) or type(other) is uncertainties.core.Variable: state_updates = { #'name': "(%s - %s)" % (other, self.name), #'tex': "{(%s - %s)}" % (other, self.tex), - 'hist': other - self.hist if is_not_vb else \ + 'hist': other - self.hist if not self.variable_binning else \ [other - hi for hi in self.hist], } elif isinstance(other, np.ndarray): - if is_not_vb: + if not self.variable_binning: state_updates = { #'name': "(array - %s)" % self.name, #'tex': "{(X - %s)}" % self.tex, @@ -2360,20 +2342,19 @@ def sqrt(self): sqrt_map : Map """ - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) state_updates = { #'name': "sqrt(%s)" % self.name, #'tex': r"\sqrt{%s}" % self.tex, #'hist': np.asarray(unp.sqrt(self.hist), dtype='float'), - 'hist': unp.sqrt(self.hist) if is_not_vb else [unp.sqrt(hi) for hi in self.hist], + 'hist': unp.sqrt(self.hist) if not self.variable_binning else \ + [unp.sqrt(hi) for hi in self.hist], } return state_updates def allclose(self, other): '''Check if this map and another have the same (within machine precision) bin counts''' self.assert_compat(other) - is_not_vb = not isinstance(self.binning, VarMultiDimBinning) - if is_not_vb: + if not self.variable_binning: return np.allclose(self.nominal_values, other.nominal_values, **ALLCLOSE_KW) else: return np.all([np.allclose(nv, other_nv, **ALLCLOSE_KW) \ From 22d70683071f900ad0e0f2764597e6c514513814 Mon Sep 17 00:00:00 2001 From: Maria Date: Mon, 4 Nov 2024 02:59:19 -0600 Subject: [PATCH 06/10] added or flags to Map for methods where VarMultiDimBinning either does not make sense or support is not yet implemented; also extended test_Map to include tests with VarMultiDimBinning --- pisa/core/binning.py | 2 +- pisa/core/map.py | 172 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 160 insertions(+), 14 deletions(-) diff --git a/pisa/core/binning.py b/pisa/core/binning.py index 20f10e554..88e5e6b01 100644 --- a/pisa/core/binning.py +++ b/pisa/core/binning.py @@ -3093,7 +3093,7 @@ def serializable_state(self): state = OrderedDict() state['name'] = self.name state['selection'] = self.selection - state['binning'] = 'MultiDimBinning(%s)'%self.binning.serializable_state + state['binning'] = self.binning.serializable_state self._serializable_state = state return self._serializable_state diff --git a/pisa/core/map.py b/pisa/core/map.py index fd8b3395e..14237d52f 100755 --- a/pisa/core/map.py +++ b/pisa/core/map.py @@ -30,7 +30,7 @@ from uncertainties import unumpy as unp from pisa import ureg, HASH_SIGFIGS -from pisa.core.binning import OneDimBinning, MultiDimBinning, VarMultiDimBinning +from pisa.core.binning import OneDimBinning, MultiDimBinning, VarMultiDimBinning, EventSpecie from pisa.utils.comparisons import normQuant, recursiveEquality, ALLCLOSE_KW from pisa.utils.flavInt import NuFlavIntGroup from pisa.utils.hash import hash_obj @@ -315,6 +315,15 @@ def __init__(self, name, hist, binning, error_hist=None, hash=None, binning = MultiDimBinning(dimensions=binning) elif isinstance(binning, Mapping): binning = MultiDimBinning(**binning) + # TODO: figure out file read out for VarMultiDimBinning +# if not 'event_species' in binning.keys(): +# binning = MultiDimBinning(**binning) +# else: +# ev_species = [EventSpecie(**es_str) for es_str +# in binning['event_species']] +# print(binning['event_species']) +# binning = VarMultiDimBinning(event_species=ev_species, +# name=binning['name']) else: raise ValueError('Do not know what to do with `binning`=%s of' ' type %s' %(binning, type(binning))) @@ -345,9 +354,17 @@ def __repr__(self): np.set_printoptions(precision=18) try: state = self.serializable_state - state['hist'] = np.array_repr(state['hist']) + + if not self.variable_binning: + state['hist'] = np.array_repr(state['hist']) + else: + state['hist'] = [np.array_repr(h) for h in state['hist']] + if state['error_hist'] is not None: - state['error_hist'] = np.array_repr(state['error_hist']) + if not self.variable_binning: + state['error_hist'] = np.array_repr(state['error_hist']) + else: + state['error_hist'] = [np.array_repr(eh) for eh in state['error_hist']] argstrs = [('%s=%r' % item) for item in self.serializable_state.items()] r = '%s(%s)' % (self.__class__.__name__, ',\n '.join(argstrs)) @@ -361,7 +378,10 @@ def __str__(self): state = {a: getattr(self, a) for a in attrs} state['name'] = repr(state['name']) state['tex'] = repr(state['tex']) - state['hist'] = np.array_repr(state['hist']) + if not self.variable_binning: + state['hist'] = np.array_repr(state['hist']) + else: + state['hist'] = [np.array_repr(h) for h in state['hist']] argstrs = [('%s=%s' % (a, state[a])) for a in attrs] s = '%s(%s)' % (self.__class__.__name__, ',\n '.join(argstrs)) return s @@ -404,6 +424,9 @@ def item(self, *args): z : Standard Python scalar object """ + if self.variable_binning: + raise TypeError('item() method not supported for Map with VarMultiDimBinning') + return self.hist.item(*args) def slice(self, **kwargs): @@ -473,15 +496,24 @@ def slice(self, **kwargs): object's dimensionality. """ + if self.variable_binning: + raise TypeError('slice() method not supported for Map with VarMultiDimBinning') + return self[self.binning.indexer(**kwargs)] def set_poisson_errors(self): """Approximate poisson errors using sqrt(n).""" nom_values = self.nominal_values - super().__setattr__( - '_hist', - unp.uarray(nom_values, np.sqrt(nom_values)) - ) + if not self.variable_binning: + super().__setattr__( + '_hist', + unp.uarray(nom_values, np.sqrt(nom_values)) + ) + else: + super().__setattr__( + '_hist', + [unp.uarray(nv, np.sqrt(nv)) for nv in nom_values] + ) def set_errors(self, error_hist): """Manually define the error with an array the same shape as the @@ -538,6 +570,11 @@ def compare(self, ref): * 'infmatch' : bool, whether +inf (and separately -inf) entries match """ + + if self.variable_binning: + raise TypeError('compare() method is not implemented for ' + 'Map with VarMultiDimBinning') + assert isinstance(ref, Map) assert ref.binning == self.binning diff = self - ref @@ -705,6 +742,11 @@ def plot(self, symm=False, logz=False, vmin=None, vmax=None, backend=None, colorbar : :class:`matplotlib.colorbar.Colorbar` """ + + if self.variable_binning: + raise TypeError('plot() method is not implemented for ' + 'Map with VarMultiDimBinning') + import matplotlib as mpl import matplotlib.pyplot as plt from mpl_toolkits.axes_grid1 import make_axes_locatable @@ -949,6 +991,11 @@ def reorder_dimensions(self, order): Modify Map (and its binning) by combining adjacent bins """ + + if self.variable_binning: + raise TypeError('reorder_dimensions() method not supported for ' + 'Map with VarMultiDimBinning') + new_binning = self.binning.reorder_dimensions(order) orig_order = list(range(len(self.binning))) new_order = [self.binning.index(b, use_basenames=False) @@ -969,6 +1016,11 @@ def squeeze(self): Map with equivalent values but singleton dimensions removed """ + + if self.variable_binning: + raise TypeError('squeeze() method not supported for ' + 'Map with VarMultiDimBinning') + new_binning = self.binning.squeeze() new_hist = self.hist.squeeze() return {'hist': new_hist, 'binning': new_binning} @@ -976,9 +1028,13 @@ def squeeze(self): @_new_obj def round2int(self): binning = self.binning - nominal_values = np.rint(self.nominal_values) + nominal_values = self.nominal_values std_devs = self.std_devs - return {'hist': unp.uarray(nominal_values, std_devs)} + if not self.variable_binning: + return {'hist': unp.uarray(np.rint(nominal_values), std_devs)} + else: + return {'hist': [unp.uarray(np.rint(nv), sd) for nv,sd + in zip(nominal_values, std_devs)]} @_new_obj def sum(self, axis=None, keepdims=False): @@ -1008,6 +1064,10 @@ def sum(self, axis=None, keepdims=False): #TODO This function does't work if axis=None, since @_new_obj expects the output to be a map + if self.variable_binning: + raise TypeError('sum() method not supported for ' + 'Map with VarMultiDimBinning') + if axis is None: axis = self.binning.names if isinstance(axis, (string_types, int)): @@ -1045,6 +1105,11 @@ def project(self, axis, keepdims=False): projection : Map """ + + if self.variable_binning: + raise TypeError('project() method not supported for ' + 'Map with VarMultiDimBinning') + keep_index = self.binning.index(axis) sum_indices = list(range(len(self.binning.dims))) sum_indices.remove(keep_index) @@ -1076,6 +1141,10 @@ def rebin(self, new_binning): """ # TODO: put uncertainties in + if self.variable_binning: + raise TypeError('rebin() method not supported for ' + 'Map with VarMultiDimBinning') + assert self.binning.mask is None, "`rebin` function does not currenty support bin masking" new_hist = rebin(hist=self.hist, orig_binning=self.binning, @@ -1090,6 +1159,11 @@ def downsample(self, *args, **kwargs): details. """ + + if self.variable_binning: + raise TypeError('downsample() method not supported for ' + 'Map with VarMultiDimBinning') + new_binning = self.binning.downsample(*args, **kwargs) return self.rebin(new_binning) @@ -1121,6 +1195,11 @@ def fluctuate(self, method, random_state=None, jumpahead=None): .. [1] Bohm & Zech, "Statistics of weighted Poisson events and its applications" (2013), https://arxiv.org/abs/1309.1287 """ + + if self.variable_binning: + raise TypeError('fluctuate() method not yet implemented for ' + 'Map with VarMultiDimBinning (TODO)') + orig = method method = str(method).strip().lower().replace(' ', '') if not method in FLUCTUATE_METHODS: @@ -1234,17 +1313,25 @@ def fluctuate(self, method, random_state=None, jumpahead=None): @property def shape(self): """tuple : shape of the map, akin to `nump.ndarray.shape`""" - return self.hist.shape + # return self.hist.shape + # any reason to take shape from the hist and not binning for Map with MultiDimBinning? + return self.binning.shape @property def size(self): """int : total number of elements""" - return self.hist.size + if not self.variable_binning: + return self.hist.size + else: + return np.sum([h.size for h in self.hist]) @property def num_entries(self): """int : total number of weighted entries in all bins""" - return np.sum(valid_nominal_values(self.hist)) + if not self.variable_binning: + return np.sum(valid_nominal_values(self.hist)) + else: + return np.sum([np.sum(valid_nominal_values(h)) for h in self.hist]) @property def serializable_state(self): @@ -1316,6 +1403,10 @@ def to_json(self, filename, **kwargs): pisa.utils.jsons.to_json """ + if self.variable_binning: + raise TypeError('to_json() method not yet implemented for ' + 'Map with VarMultiDimBinning (TODO)') + jsons.to_json(self.serializable_state, filename=filename, **kwargs) @classmethod @@ -1365,6 +1456,10 @@ def iterbins(self): Map object containing one of each bin of this Map """ + if self.variable_binning: + raise TypeError('iterbins() method is not supported for ' + 'Map with VarMultiDimBinning') + for i in range(self.size): idx_coord = self.binning.index2coord(i) idx_view = tuple(slice(x, x+1) for x in idx_coord) @@ -1382,6 +1477,10 @@ def iterbins(self): # TODO : example! def itercoords(self): """Iterator that yields the coordinate of each bin in the map.""" + if self.variable_binning: + raise TypeError('itercoords() method is not supported for ' + 'Map with VarMultiDimBinning') + return self.binning.itercoords() def __hash__(self): @@ -1406,6 +1505,10 @@ def _slice_or_index(self, idx): hist should be). """ + if self.variable_binning: + raise TypeError('_slice_or_index() method is not supported for ' + 'Map with VarMultiDimBinning') + new_binning = self.binning[idx] new_map = Map(name=self.name, @@ -1473,6 +1576,9 @@ def split(self, dim, bin=None, use_basenames=False, pure_bin_names=False): copied into the new map(s). """ + if self.variable_binning: + raise TypeError('split() method is not supported for ' + 'Map with VarMultiDimBinning') dim_index = self.binning.index(dim, use_basenames=use_basenames) spliton_dim = self.binning.dims[dim_index] @@ -3500,6 +3606,46 @@ def test_Map(): # assert test_return[2] == matplotlib.collections.QuadMesh # assert test_return[3] == matplotlib.colorbar.Colorbar + + # Test Map with VarMultiDimBinning + binning_2d = MultiDimBinning([dict(name='reco_energy', is_log=True, + num_bins=12, domain=[5., 80.], units='GeV'), + dict(name='reco_coszen', is_lin=True, + num_bins=10, domain=[-1, 0.]) + ]) + binning_3d = MultiDimBinning([dict(name='reco_energy', is_log=True, + num_bins=12, domain=[5., 80.], units='GeV'), + dict(name='reco_coszen', is_lin=True, + num_bins=10, domain=[-1, 0.]), + dict(name='pid', is_lin=True, + bin_edges=[0.0, 0.25, 0.55, 1.0], + bin_names=['cascades', 'mixed','tracks']) + ]) + vb = VarMultiDimBinning([EventSpecie(name='high_ndom', selection='n_hit_doms>20', + binning=binning_3d), + EventSpecie(name='low_ndom', selection='n_hit_doms<20', + binning=binning_2d), + ]) + + hist_vb = [np.ones(sp) for sp in vb.shape] + mvb = Map(name='a', hist=hist_vb, error_hist=hist_vb, binning=vb) + + mvb.set_poisson_errors() + mvb.set_errors([3*np.ones(sp) for sp in vb.shape]) + _ = mvb.shape + _ = mvb.size + _ = mvb.num_entries + + mvb_copy = deepcopy(mvb) + assert mvb == mvb_copy + assert mvb*2 != mvb_copy + assert mvb.sqrt()**2 == mvb_copy + + _ = mvb+mvb_copy + _ = mvb-mvb_copy + _ = mvb*mvb_copy + _ = mvb/mvb_copy + logging.info(str(('<< PASS : test_Map >>'))) From 659577e81f8de75d7d9b73ead3d3d01d926ea13b Mon Sep 17 00:00:00 2001 From: Maria Date: Tue, 5 Nov 2024 02:53:56 -0600 Subject: [PATCH 07/10] clean up pipeline and container; allow for better implenetation of variable binning --- pisa/core/container.py | 18 ++++++------------ pisa/core/pipeline.py | 8 +++----- pisa/stages/utils/hist.py | 21 ++------------------- 3 files changed, 11 insertions(+), 36 deletions(-) diff --git a/pisa/core/container.py b/pisa/core/container.py index 24adb575a..1590fdf6e 100644 --- a/pisa/core/container.py +++ b/pisa/core/container.py @@ -33,7 +33,7 @@ class ContainerSet(object): containers : list or None - data_specs : MultiDimBinning, VarMultiDimBinning, "events" or None + data_specs : MultiDimBinning, VarMultiDimBinning (partial support), "events" or None """ def __init__(self, name, containers=None, data_specs=None): @@ -306,10 +306,8 @@ def representation(self, representation): for name in representation.names: self.validity[name][key] = True elif isinstance(representation, VarMultiDimBinning): -# self.representation = 'events' for bn in representation.binnings: for bn_name in bn.names: -# print(bn_name, key) self.validity[bn_name][key] = True elif isinstance(representation, str): if representation not in self.array_representations: @@ -501,14 +499,11 @@ def get_hist(self, key): """Return reshaped data as normal n-dimensional histogram""" assert self.is_map, 'Cannot retrieve hists from non-map data' - # retrieve in case needs translation (except VarMultiDimBinning) -# print(key, self.keys) - if not isinstance(self.representation, VarMultiDimBinning): - self[key] + # retrieve in case needs translation + self[key] binning = self.representation data = self[key] - print(binning, data) if isinstance(binning, VarMultiDimBinning): assert len(binning.species) == len(data) @@ -561,8 +556,8 @@ def translate(self, key, src_representation): # nothing to do return - from_map = isinstance(src_representation, (MultiDimBinning,VarMultiDimBinning)) - to_map = isinstance(dest_representation, (MultiDimBinning,VarMultiDimBinning)) + from_map = isinstance(src_representation, MultiDimBinning) + to_map = isinstance(dest_representation, MultiDimBinning) if self.tranlation_modes[key] == 'average': if from_map and to_map: @@ -664,10 +659,9 @@ def array_to_binned(self, key, src_representation, dest_representation, averaged """ # TODO: make work for n-dim logging.trace('Transforming %s array to binned data'%(key)) -# print('calling container.array_to_binned() method') assert src_representation in self.array_representations - assert isinstance(dest_representation, (MultiDimBinning,VarMultiDimBinning)) + assert isinstance(dest_representation, MultiDimBinning) if not dest_representation.is_irregular: sample = [] diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py index d68a364a0..52c2badf3 100755 --- a/pisa/core/pipeline.py +++ b/pisa/core/pipeline.py @@ -318,8 +318,6 @@ def _init_stages(self): def get_outputs(self, output_binning=None, output_key=None): """Get MapSet output""" - - self.run() if output_binning is None: @@ -330,9 +328,9 @@ def get_outputs(self, output_binning=None, output_key=None): assert output_binning is not None - # TODO: figure out how this is messing up variable binning!!! -# self.data.representation = output_binning -# print(self.data.keys) + # avoid unnecessary translation + if self.data.representation != output_binning: + self.data.representation = output_binning if isinstance(output_key, tuple): assert len(output_key) == 2 diff --git a/pisa/stages/utils/hist.py b/pisa/stages/utils/hist.py index bb6aa5585..a1c8bdab8 100644 --- a/pisa/stages/utils/hist.py +++ b/pisa/stages/utils/hist.py @@ -48,9 +48,6 @@ def setup_function(self): "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode ) -# for container in self.data: -# print('initial', container.keys) - self.variable_binning = False if isinstance(self.apply_mode, VarMultiDimBinning): self.variable_binning = True @@ -134,14 +131,12 @@ def setup_function(self): ) else: # TODO: deal with repeat code - print('calculating using var binning!!!') species = [] for specie in self.apply_mode.species: dimensions = [] for dim in specie.binning: if dim.is_irregular: varname = dim.name + "__" + specie.name + "_" + "_idx" - print(varname) new_dim = OneDimBinning( varname, domain=[0, dim.num_bins], num_bins=dim.num_bins ) @@ -170,15 +165,9 @@ def setup_function(self): else: raise ValueError(f"unknown calc mode: {self.calc_mode}") -# for container in self.data: -# print('setup_final', container.keys) - @profile def apply_function(self): -# for container in self.data: -# print('apply_initial', container.keys) - if isinstance(self.calc_mode, MultiDimBinning): if self.unweighted: @@ -212,9 +201,7 @@ def apply_function(self): elif self.calc_mode == "events": for container in self.data: -# print(container.keys) container.representation = self.calc_mode - print(container.keys) hists = [] errors = [] bin_unc2s = [] @@ -225,13 +212,11 @@ def apply_function(self): sel_mask = np.ones_like(container["weights"]) for var_name in container.keys: #TODO add check that masks don't overlap # using word boundary \b to replace whole words only - print(type(var_name), var_name) selection = re.sub( r'\b{}\b'.format(var_name), 'container["%s"]' % (var_name), selection ) - print(selection) sel_mask = eval(selection) # pylint: disable=eval-used sample = [] @@ -273,18 +258,16 @@ def apply_function(self): MultiDimBinning(reg_specie.binning), averaged=False ) -# print(hist) hists.append(hist) if self.error_method == "sumw2": errors.append( np.sqrt(histogram(sample, np.square(unc_weights*weights), reg_specie.binning, averaged=False)) ) - bin_unc2s.append(histogram(sample, np.square(unc_weights)*weights, - reg_specie.binning, averaged=False)) + bin_unc2s.append( histogram(sample, np.square(unc_weights)*weights, + reg_specie.binning, averaged=False) ) container.representation = self.apply_mode container["weights"] = self.apply_mode, hists - print(container) if self.error_method == "sumw2": container["errors"] = self.apply_mode, errors From e07b0469288c695a1e0da6d6a750d2ba31a0b15d Mon Sep 17 00:00:00 2001 From: Maria Date: Sat, 9 Nov 2024 19:37:48 -0600 Subject: [PATCH 08/10] fix MultiDimBinning handling in hist stage (a bit of duplicate code for now but leaving this for later); also fix selection=None handling in config_parser --- pisa/stages/utils/hist.py | 145 +++++++++++++++++++++++++----------- pisa/utils/config_parser.py | 3 +- 2 files changed, 103 insertions(+), 45 deletions(-) diff --git a/pisa/stages/utils/hist.py b/pisa/stages/utils/hist.py index a1c8bdab8..05e3a86fa 100644 --- a/pisa/stages/utils/hist.py +++ b/pisa/stages/utils/hist.py @@ -124,8 +124,7 @@ def setup_function(self): dimensions.append(dim) # simplify the code by making it a VarMultiDimBinning ... # self.regularized_apply_mode = MultiDimBinning(dimensions) - self.regularized_apply_mode = VarMultiDimBinning([EventSpecie( - binning=MultiDimBinning(dimensions))]) + self.regularized_apply_mode = MultiDimBinning(dimensions) logging.debug( "Using regularized binning:\n" + str(self.regularized_apply_mode) ) @@ -200,53 +199,37 @@ def apply_function(self): container["bin_unc2"] = bin_unc2 elif self.calc_mode == "events": - for container in self.data: - container.representation = self.calc_mode - hists = [] - errors = [] - bin_unc2s = [] - for specie, reg_specie in zip(self.apply_mode.species, - self.regularized_apply_mode): - selection = specie.selection - if selection is None: - sel_mask = np.ones_like(container["weights"]) - for var_name in container.keys: #TODO add check that masks don't overlap - # using word boundary \b to replace whole words only - selection = re.sub( - r'\b{}\b'.format(var_name), - 'container["%s"]' % (var_name), - selection - ) - sel_mask = eval(selection) # pylint: disable=eval-used - + if not self.variable_binning: + for container in self.data: + container.representation = self.calc_mode + sample = [] - dims_log = [d.is_log for d in specie.binning] - dims_ire = [d.is_irregular for d in specie.binning] + dims_log = [d.is_log for d in self.apply_mode] + dims_ire = [d.is_irregular for d in self.apply_mode] for dim, is_log, is_ire in zip( - reg_specie.binning, dims_log, dims_ire + self.regularized_apply_mode, dims_log, dims_ire ): - if is_log and not is_ire: container.representation = "log_events" - sample.append(container[dim.name][sel_mask]) + sample.append(container[dim.name]) else: container.representation = "events" - sample.append(container[dim.name][sel_mask]) + sample.append(container[dim.name]) if self.unweighted: if "astro_weights" in container.keys: weights = np.ones_like( - container["weights"][sel_mask] + container["astro_weights"][sel_mask] + container["weights"] + container["astro_weights"] ) else: - weights = np.ones_like(container["weights"][sel_mask]) + weights = np.ones_like(container["weights"]) else: if "astro_weights" in container.keys: - weights = container["weights"][sel_mask] + container["astro_weights"][sel_mask] + weights = container["weights"] + container["astro_weights"] else: - weights = container["weights"][sel_mask] + weights = container["weights"] if self.apply_unc_weights: - unc_weights = container["unc_weights"][sel_mask] + unc_weights = container["unc_weights"] else: unc_weights = np.ones(weights.shape) @@ -254,21 +237,95 @@ def apply_function(self): # and regular hist = histogram( sample, - (unc_weights*weights), - MultiDimBinning(reg_specie.binning), + unc_weights*weights, + self.regularized_apply_mode, averaged=False ) - hists.append(hist) if self.error_method == "sumw2": - errors.append( np.sqrt(histogram(sample, np.square(unc_weights*weights), - reg_specie.binning, averaged=False)) ) - bin_unc2s.append( histogram(sample, np.square(unc_weights)*weights, - reg_specie.binning, averaged=False) ) + sumw2 = histogram(sample, np.square(unc_weights*weights), + self.regularized_apply_mode, averaged=False) + bin_unc2 = histogram(sample, np.square(unc_weights)*weights, + self.regularized_apply_mode, averaged=False) - container.representation = self.apply_mode - container["weights"] = self.apply_mode, hists + container.representation = self.apply_mode + container["weights"] = hist - if self.error_method == "sumw2": - container["errors"] = self.apply_mode, errors - container["bin_unc2"] = self.apply_mode, bin_unc2s + if self.error_method == "sumw2": + container["errors"] = np.sqrt(sumw2) + container["bin_unc2"] = bin_unc2 + else: + for container in self.data: + container.representation = self.calc_mode + hists = [] + errors = [] + bin_unc2s = [] + for specie, reg_specie in zip(self.apply_mode.species, + self.regularized_apply_mode): + selection = specie.selection + + if selection is None: + sel_mask = np.full(container["weights"].shape, True) + else: + for var_name in container.keys: #TODO add check that masks don't overlap + # using word boundary \b to replace whole words only + selection = re.sub( + r'\b{}\b'.format(var_name), + 'container["%s"]' % (var_name), + selection + ) + sel_mask = eval(selection) # pylint: disable=eval-used + + sample = [] + dims_log = [d.is_log for d in specie.binning] + dims_ire = [d.is_irregular for d in specie.binning] + for dim, is_log, is_ire in zip( + reg_specie.binning, dims_log, dims_ire + ): + + if is_log and not is_ire: + container.representation = "log_events" + sample.append(container[dim.name][sel_mask]) + else: + container.representation = "events" + sample.append(container[dim.name][sel_mask]) + + if self.unweighted: + if "astro_weights" in container.keys: + weights = np.ones_like( + container["weights"][sel_mask] + container["astro_weights"][sel_mask] + ) + else: + weights = np.ones_like(container["weights"][sel_mask]) + else: + if "astro_weights" in container.keys: + weights = container["weights"][sel_mask] + container["astro_weights"][sel_mask] + else: + weights = container["weights"][sel_mask] + if self.apply_unc_weights: + unc_weights = container["unc_weights"][sel_mask] + else: + unc_weights = np.ones(weights.shape) + + # The hist is now computed using a binning that is completely linear + # and regular + hist = histogram( + sample, + (unc_weights*weights), + MultiDimBinning(reg_specie.binning), + averaged=False + ) + hists.append(hist) + + if self.error_method == "sumw2": + errors.append( np.sqrt(histogram(sample, np.square(unc_weights*weights), + reg_specie.binning, averaged=False)) ) + bin_unc2s.append( histogram(sample, np.square(unc_weights)*weights, + reg_specie.binning, averaged=False) ) + + container.representation = self.apply_mode + container["weights"] = self.apply_mode, hists + + if self.error_method == "sumw2": + container["errors"] = self.apply_mode, errors + container["bin_unc2"] = self.apply_mode, bin_unc2s diff --git a/pisa/utils/config_parser.py b/pisa/utils/config_parser.py index 1ef2b8db8..d9d529ee2 100644 --- a/pisa/utils/config_parser.py +++ b/pisa/utils/config_parser.py @@ -608,7 +608,6 @@ def parse_pipeline_config(config): "Could not find 'binning'. Only found sections: %s" % config.sections() ) - # Create binning objects binning_dict = {} # Loop over binning lines @@ -683,6 +682,8 @@ def parse_pipeline_config(config): "should be defined after MultiDimBinnings used in their definition.", binning ) + if sel.strip() == 'None': + sel = None species.append(EventSpecie(name=specie_name, selection=sel, binning=binning_dict[bin_def])) binning_dict[binning] = VarMultiDimBinning(name=binning, event_species=species) From 4a5fae50e6cc3fc1a661d06f2200ba5b061b32bd Mon Sep 17 00:00:00 2001 From: Maria Date: Sat, 9 Nov 2024 20:14:45 -0600 Subject: [PATCH 09/10] fix typo --- pisa/core/binning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pisa/core/binning.py b/pisa/core/binning.py index 88e5e6b01..114cce11d 100644 --- a/pisa/core/binning.py +++ b/pisa/core/binning.py @@ -3389,7 +3389,7 @@ def __getitem__(self, index): raise ValueError( "Dimension '%s': bin index %s is invalid. Bin index" " must be >= %+d and <= %+d" - %(self.name, bin_index, -mylen, mylen-1) + %(self.name, sp_index, -mylen, mylen-1) ) new_species.append(deepcopy(self._species[sp_index])) else: From c2a89f3cad0f07ce02216d3f958b434b1eed84d1 Mon Sep 17 00:00:00 2001 From: Maria Date: Mon, 11 Nov 2024 18:59:45 -0600 Subject: [PATCH 10/10] fix leftover old var binning flag in map --- pisa/core/map.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pisa/core/map.py b/pisa/core/map.py index 14237d52f..187564f1b 100755 --- a/pisa/core/map.py +++ b/pisa/core/map.py @@ -1681,7 +1681,7 @@ def llh(self, expected_values, binned=False): expected_values=exp_val_hi)\ for hi, exp_val_hi in zip(self.hist, expected_values)] - if is_not_vb: + if not self.variable_binning: return np.sum(stats.llh(actual_values=self.hist, expected_values=expected_values)) return np.sum([np.sum(stats.llh(actual_values=hi, expected_values=exp_val_hi))\ @@ -1715,7 +1715,7 @@ def mcllh_mean(self, expected_values, binned=False): expected_values=exp_val_hi)\ for hi, exp_val_hi in zip(self.hist, expected_values)] - if is_not_vb: + if not self.variable_binning: return np.sum(stats.mcllh_mean(actual_values=self.hist, expected_values=expected_values)) return np.sum([np.sum(stats.mcllh_mean(actual_values=hi, expected_values=exp_val_hi))\ @@ -1750,7 +1750,7 @@ def mcllh_eff(self, expected_values, binned=False): expected_values=exp_val_hi)\ for hi, exp_val_hi in zip(self.hist, expected_values)] - if is_not_vb: + if not self.variable_binning: return np.sum(stats.mcllh_eff(actual_values=self.hist, expected_values=expected_values)) return np.sum([np.sum(stats.mcllh_eff(actual_values=hi, expected_values=exp_val_hi))\ @@ -1784,7 +1784,7 @@ def conv_llh(self, expected_values, binned=False): expected_values=exp_val_hi)\ for hi, exp_val_hi in zip(self.hist, expected_values)] - if is_not_vb: + if not self.variable_binning: return np.sum(stats.conv_llh(actual_values=self.hist, expected_values=expected_values)) return np.sum([np.sum(stats.conv_llh(actual_values=hi, expected_values=exp_val_hi))\ @@ -1824,7 +1824,7 @@ def barlow_llh(self, expected_values, binned=False): expected_values=exp_val_hi)\ for hi, exp_val_hi in zip(self.hist, expected_values)] - if is_not_vb: + if not self.variable_binning: return np.sum(stats.barlow_llh(actual_values=self.hist, expected_values=expected_values)) return np.sum([np.sum(stats.barlow_llh(actual_values=hi, expected_values=exp_val_hi))\ @@ -1859,7 +1859,7 @@ def mod_chi2(self, expected_values, binned=False): expected_values=exp_val_hi)\ for hi, exp_val_hi in zip(self.hist, expected_values)] - if is_not_vb: + if not self.variable_binning: return np.sum(stats.mod_chi2(actual_values=self.hist, expected_values=expected_values)) return np.sum([np.sum(stats.mod_chi2(actual_values=hi, expected_values=exp_val_hi))\ @@ -1893,7 +1893,7 @@ def correct_chi2(self, expected_values, binned=False): expected_values=exp_val_hi)\ for hi, exp_val_hi in zip(self.hist, expected_values)] - if is_not_vb: + if not self.variable_binning: return np.sum(stats.correct_chi2(actual_values=self.hist, expected_values=expected_values)) return np.sum([np.sum(stats.correct_chi2(actual_values=hi, expected_values=exp_val_hi))\ @@ -1927,7 +1927,7 @@ def chi2(self, expected_values, binned=False): expected_values=exp_val_hi)\ for hi, exp_val_hi in zip(self.hist, expected_values)] - if is_not_vb: + if not self.variable_binning: return np.sum(stats.chi2(actual_values=self.hist, expected_values=expected_values)) return np.sum([np.sum(stats.chi2(actual_values=hi, expected_values=exp_val_hi))\