diff --git a/.travis.yml b/.travis.yml index 64dacb89a93..58de9dfc00c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ cache: language: python python: - - "2.7" - "3.6" env: diff --git a/cmsl1t/__init__.py b/cmsl1t/__init__.py index 910dca48cd5..3cd437f6817 100644 --- a/cmsl1t/__init__.py +++ b/cmsl1t/__init__.py @@ -1,7 +1,8 @@ from __future__ import absolute_import +import logging import os from os import path -import logging + __version__ = '0.5.1' @@ -22,8 +23,8 @@ logger.addHandler(ch) if 'PROJECT_ROOT' not in os.environ: - logger.warn("Could not find environmental variable 'PROJECT_ROOT'") - logger.warn("You should to run 'source setup.sh' first!") + logger.warning("Could not find environmental variable 'PROJECT_ROOT'") + logger.warning("You should to run 'source setup.sh' first!") HERE = path.dirname(path.abspath(__file__)) PROJECT_ROOT = path.abspath(path.join(HERE, path.pardir)) else: diff --git a/cmsl1t/analyzers/demo_analyzer.py b/cmsl1t/analyzers/demo_analyzer.py index a8ff18b0be6..3af964fea69 100644 --- a/cmsl1t/analyzers/demo_analyzer.py +++ b/cmsl1t/analyzers/demo_analyzer.py @@ -6,7 +6,7 @@ import numpy as np from .BaseAnalyzer import BaseAnalyzer -from cmsl1t.collections import EfficiencyCollection +from cmsl1t.collections import EfficiencyCollection, VectorizedHistCollection class Analyzer(BaseAnalyzer): @@ -14,25 +14,30 @@ class Analyzer(BaseAnalyzer): def __init__(self, **kwargs): super(Analyzer, self).__init__(**kwargs) - self.met_calcs = dict( - RecalcL1EmuMETNot28=dict( + self.met_calcs = { + self.name + '_' + 'RecalcL1EmuMETNot28': dict( title="Emulated MET, |ieta|<28", attr='l1MetNot28'), - RecalcL1EmuMETNot28HF=dict( + self.name + '_' + 'RecalcL1EmuMETNot28HF': dict( title="Emulated MET, |ieta|!=28", attr='l1MetNot28HF'), - ) + } def prepare_for_events(self, reader): bins = np.arange(0, 200, 25) thresholds = [70, 90, 110] puBins = list(range(0, 50, 10)) + [999] + self.hists = VectorizedHistCollection(innerBins=puBins, innerLabel='pu') + self.efficiencies = EfficiencyCollection(pileupBins=puBins) add_met_variable = partial( self.efficiencies.add_variable, bins=bins, thresholds=thresholds) list(map(add_met_variable, self.met_calcs)) + + for met, config in self.met_calcs.items(): + self.hists.insert(met, bins=bins, title=config['title']) return True def reload_histograms(self, input_file): @@ -43,16 +48,19 @@ def reload_histograms(self, input_file): def fill_histograms(self, entry, event): pileup = event['Vertex_nVtx'] self.efficiencies.set_pileup(pileup) + self.hists.inner_fill(pileup) offlineMetBE = event.Sums_caloMetBE for name, config in self.met_calcs.items(): onlineMet = event[config['attr']] onlineMet = onlineMet.mag self.efficiencies.fill_array(name, offlineMetBE, onlineMet) + self.hists[pileup][name].fill(offlineMetBE) return True def write_histograms(self): - self.efficiencies.to_root(self.get_histogram_filename()) + self.efficiencies.to_root(self.get_histogram_filename().replace('.root', '_efficiencies.root')) + self.hists.to_root(self.get_histogram_filename()) return True def make_plots(self): diff --git a/cmsl1t/analyzers/jetMet_analyzer.py b/cmsl1t/analyzers/jetMet_analyzer.py index d0f963032eb..a04f1ebf580 100644 --- a/cmsl1t/analyzers/jetMet_analyzer.py +++ b/cmsl1t/analyzers/jetMet_analyzer.py @@ -119,7 +119,7 @@ def __init__(self, **kwargs): lumiMuDict = dict() run_lumi_csv = os.path.join(cmsl1t.PROJECT_ROOT, 'run_lumi.csv') - with open(run_lumi_csv) as runLumiFile: + with open(run_lumi_csv, 'rb') as runLumiFile: reader = csv.reader(runLumiFile, delimiter=',') for line in reader: lumiMuDict[(int(line[1]), int(line[2]))] = float(line[3]) @@ -361,8 +361,8 @@ def fill_histograms(self, entry, event): # pileup = self._lumiMu[(event['run'], event['lumi'])] pileup = 51 # print pileup - if pileup >= 60 or pileup < 50: - return True + # if pileup >= 60 or pileup < 50: + # return True for name in self._sumTypes: if 'pfMET' in name and not pfMetFilter(event): diff --git a/cmsl1t/collections/__init__.py b/cmsl1t/collections/__init__.py index daab1251d3d..30722f35d3e 100644 --- a/cmsl1t/collections/__init__.py +++ b/cmsl1t/collections/__init__.py @@ -5,10 +5,12 @@ from .by_pileup import HistogramsByPileUpCollection from .resolution import ResolutionCollection from .efficiency import EfficiencyCollection +from .vectorized import VectorizedHistCollection __all__ = [ 'BaseHistCollection', 'HistogramsByPileUpCollection', 'ResolutionCollection', 'EfficiencyCollection', + 'VectorizedHistCollection', ] diff --git a/cmsl1t/collections/base.py b/cmsl1t/collections/base.py index 479ff9fd8b3..4b355d61199 100644 --- a/cmsl1t/collections/base.py +++ b/cmsl1t/collections/base.py @@ -20,10 +20,10 @@ logger = logging.getLogger(__name__) -def create_n_dim_dict(dimensions, initiaValue=0): +def create_n_dim_dict(dimensions, initialValue=0): if dimensions < 1: - return initiaValue - factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initiaValue=initiaValue) + return initialValue + factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initialValue=initialValue) return defaultdict(factory) @@ -40,20 +40,20 @@ def create_n_dim_dict(dimensions, initiaValue=0): def len_n_dim_dict(dictionary, dimensions): if dimensions <= 1: - return len(dictionary) + return len(dictionary.keys()) return sum(len_n_dim_dict(v, dimensions - 1) for v in six.itervalues(dictionary)) class BaseHistCollection(defaultdict): - def __init__(self, dimensions, initiaValue=0): + def __init__(self, dimensions, initialValue=0): ''' For each dimension create a dictionary ''' # TODO: add possibility for different lambda expresions for each # dimension. This will allow to have custom dicts in certain dimensions - factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initiaValue=initiaValue) + factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initialValue=initialValue) if sys.version_info[0] < 3: defaultdict.__init__(self, factory) else: diff --git a/cmsl1t/collections/by_pileup.py b/cmsl1t/collections/by_pileup.py index 42d9f2c15f7..e5c6011182b 100644 --- a/cmsl1t/collections/by_pileup.py +++ b/cmsl1t/collections/by_pileup.py @@ -39,7 +39,7 @@ def add(self, hist_name, bins=[]): 'No bins specified for histogram {0}'.format(hist_name)) if hist_name in self[self._pileupBins[0]].keys(): - logger.warn('Histogram {0} already exists!'.format(hist_name)) + logger.warning('Histogram {0} already exists!'.format(hist_name)) return hist_names = [] add_name = hist_names.append diff --git a/cmsl1t/collections/efficiency.py b/cmsl1t/collections/efficiency.py index f897c7d1f93..e4d05e9b578 100644 --- a/cmsl1t/collections/efficiency.py +++ b/cmsl1t/collections/efficiency.py @@ -100,7 +100,7 @@ def add_variable(self, variable, bins, thresholds): """ # TODO: this will no longer work since 1st dimension is pileup if variable in self.keys(): - logger.warn('Variable {0} already exists!') + logger.warning('Variable {0} already exists!') return self._thresholds[variable] = thresholds hist_names = [] @@ -123,7 +123,7 @@ def fill(self, hist_name, recoValue, l1Value, w=1.0): logger.error('Histogram {0} does not exist'.format(hist_name)) return if hist_name not in self._thresholds: - logger.warn('No valid current thresholds.') + logger.warning('No valid current thresholds.') for threshold in self._thresholds[hist_name]: h[threshold].fill(recoValue, l1Value, w) @@ -136,7 +136,7 @@ def fill_array(self, hist_name, recoValue, l1Value, w=None): logger.error('Histogram {0} does not exist'.format(hist_name)) return if hist_name not in self._thresholds: - logger.warn('No valid current thresholds.') + logger.warning('No valid current thresholds.') for threshold in self._thresholds[hist_name]: h[threshold].fill_array(recoValue, l1Value, w) diff --git a/cmsl1t/collections/resolution.py b/cmsl1t/collections/resolution.py index 776ace482d2..acad3b9b8e8 100644 --- a/cmsl1t/collections/resolution.py +++ b/cmsl1t/collections/resolution.py @@ -55,7 +55,7 @@ def fill(self, hist_name, x, w=1.0): logger.error('Histogram {0} does not exist'.format(hist_name)) return if not self._currentRegions: - logger.warn( + logger.warning( 'No valid current regions. Did you set_region_by_eta()?') for region in self._currentRegions: h[region].fill(x, w) @@ -63,7 +63,7 @@ def fill(self, hist_name, x, w=1.0): def add_variable(self, variable, bins=[]): from rootpy.plotting import Hist if variable in self.keys(): - logger.warn('Variable {0} already exists!') + logger.warning('Variable {0} already exists!') return hist_names = [] add_name = hist_names.append diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py new file mode 100644 index 00000000000..31fea7e29f8 --- /dev/null +++ b/cmsl1t/collections/vectorized.py @@ -0,0 +1,171 @@ +import awkward +from collections import defaultdict +import logging +import numpy as np +import random +from rootpy.plotting import Hist + +from . import BaseHistCollection +from ..utils.iterators import pairwise +from ..io import to_root + +logger = logging.getLogger(__name__) + + +def extend(arr1, starts, stops): + repeat = stops - starts + return np.repeat(arr1, repeat, axis=0) + + +def split_input(inner_indices, x, w): + content = x + weights = w + if hasattr(x, 'starts'): + inner_indices = extend(inner_indices, x.starts, x.stops) + content = x.content + if hasattr(w, 'starts'): + weights = w.content + + if np.size(weights) < np.size(content) and hasattr(x, 'starts'): + weights = extend(weights, x.starts, x.stops) + + for u in np.unique(inner_indices): + mask = inner_indices == u + if not isinstance(mask, (tuple, list, np.ndarray, np.generic)): + mask = np.array(mask) + yield u, content[mask], weights[mask] + + +class VectorizedHistCollection(BaseHistCollection): + + def __init__(self, innerBins, innerLabel='inner', **kwargs): + # if we want to generalize to N dim, innerBins needs to be an array of innerBins + # TODO: last dimension should probably be a normal dictionary + dimensions = kwargs.pop('dimensions', 2) + self._name = kwargs.pop('name', str(hex(random.getrandbits(128)))[2:10]) + self._execute_before_write = kwargs.pop('execute_before_write', []) + super(VectorizedHistCollection, self).__init__(dimensions) + + self._innerBins = innerBins + self._innerLabel = innerLabel + self._innerHist = Hist(innerBins, name=innerLabel + '_' + self._name) + + def __getitem__(self, key): + if not isinstance(key, (tuple, list, np.ndarray, np.generic)): + key = np.array(key) + real_keys = self._get_inner_indices(key) + return VectorizedBinProxy(self, real_keys) + # return [defaultdict.__getitem__(self, k) for k in real_keys.tolist()] + + def _get_inner_indices(self, values): + ''' + Returns the pileup bin corresponding to the provided pileup value. + - bin 0 is underflow + - bin len(innerBins) is overflow + + :Example: + >>> hists = VectorizedHistCollection(innerBins=[0,10,15,20,30,999]) + >>> hists._get_inner_indices([1, 11, 1111]) # returns [1, 2, 6] + ''' + return np.digitize(values, self._innerBins) + + def insert(self, name, bins, hist_type=Hist, **kwargs): + title = kwargs.pop('title', name) + bins = np.asarray(bins) + if bins.size == 0: + logger.error( + 'No bins specified for histogram {0}'.format(name)) + + if name in super(VectorizedHistCollection, self).__getitem__(1): + logger.warning('Histogram {0} already exists!'.format(name)) + return + names = [] + add_name = names.append + + for i, hist_name in enumerate(self._create_hist_names(name)): + __current_slice = super(VectorizedHistCollection, self).__getitem__(i + 1) + if i + 1 not in self or hist_name not in __current_slice: + add_name(hist_name) + __current_slice[hist_name] = hist_type(bins, name=hist_name, title=title) + logger.debug('Created {0} histograms: {1}'.format( + len(names), ', '.join(names))) + + def _create_hist_names(self, name): + for lowerEdge, upperEdge in pairwise(self._innerBins): + yield f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}" + + def get_hist_name(self, name, innerIndex): + lowerEdge, upperEdge = self._innerBins[innerIndex - 1], self._innerBins[innerIndex] + return f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}" + + def inner_fill(self, x, w=None): + if w is None: + w = np.ones(np.size(x)) + self._innerHist.fill_array(x, w) + + def to_root(self, output_file): + for func in self._execute_before_write: + func(self) + to_root([self, self._innerHist], output_file) + + +class VectorizedBinProxy(object): + + def __init__(self, collection, inner_indices): + self.collection = collection + self._inner_indices = inner_indices + # self._inner_values = inner_values + + def __getitem__(self, key): + # TODO, if key != string, return a BinProxy of the bin above + return VectorizedHistProxy(self, key) + + def __add__(self, other): + if self.collection != other.collection: + msg = 'Cannot add VectorizedBinProxy for two different collections' + logger.error(msg) + raise ValueError(msg) + self._inner_indices = np.append(self._inner_indices, other._inner_indices) + return self + + def __eq__(self, other): + if self.collection != other.collection: + msg = 'Cannot compare VectorizedBinProxy for two different collections' + logger.error(msg) + raise ValueError(msg) + return self._inner_indices.tolist() == other._inner_indices.tolist() + + def flatten(self): + self._inner_indices = np.unique(self._inner_indices) + return self + + +class VectorizedHistProxy(object): + + def __init__(self, bin_proxy, hist_name): + self._bin_proxy = bin_proxy + self._hist_name = hist_name + + def _get_hist(self, inner_index): + hist_name = self._bin_proxy.collection.get_hist_name(self._hist_name, inner_index) + return defaultdict.__getitem__(self._bin_proxy.collection, inner_index)[hist_name] + + def fill(self, x, w=None): + if not isinstance(x, (tuple, list, np.ndarray, awkward.JaggedArray)): + x = np.array(x) + + if w is None: + n = np.size(x.content) if hasattr(x, 'content') else np.size(x) + w = np.ones(n) + for i, x_i, w_i in split_input(self._bin_proxy._inner_indices, x, w): + hist = self._get_hist(i) + hist.fill_array(x_i, w_i) + +# class VectorizedEfficiencyProxy(object): + +# def split_input(): +# a = np.array([1, 12, 1, 10, 50, 10]) +# b = np.array([10, 20, 30, 40, 50, 60]) +# arg = a.argsort(kind='stable') +# offsets, = np.where(np.r_[True, np.diff(a[arg]) > 0]) +# output = awkward.JaggedArray.fromoffsets(offsets.flatten(), awkward.IndexedArray(arg, b)) diff --git a/cmsl1t/config.py b/cmsl1t/config.py index ac154578ff0..300b936ef05 100644 --- a/cmsl1t/config.py +++ b/cmsl1t/config.py @@ -314,7 +314,7 @@ def reduce_scope_for_analyzer(self, analyzer_name): forbidden_local_settings = ['name', 'input_files'] for s in forbidden_local_settings: if s in analyzer: - logger.warn('Setting {0} is forbidden in analysis::analyzers::{1}'.format(s, analyzer_name)) + logger.warning('Setting {0} is forbidden in analysis::analyzers::{1}'.format(s, analyzer_name)) analyzer.pop(s) global_settings = dict( diff --git a/cmsl1t/playground/eventreader.py b/cmsl1t/playground/eventreader.py index 9b9480cdc94..987601796db 100644 --- a/cmsl1t/playground/eventreader.py +++ b/cmsl1t/playground/eventreader.py @@ -367,7 +367,7 @@ def __init__(self, files, events=-1, load_trees=['event', 'upgrade']): try: chain = TreeChain(path, input_files, cache=True, events=events) except RuntimeError: - logger.warn("Cannot find tree: {0} in input file".format(path)) + logger.warning("Cannot find tree: {0} in input file".format(path)) continue self._names.append(name) self._trees.append(chain) diff --git a/cmsl1t/playground/resolution.py b/cmsl1t/playground/resolution.py index a7969f4d2c1..0b76f0909bd 100644 --- a/cmsl1t/playground/resolution.py +++ b/cmsl1t/playground/resolution.py @@ -36,7 +36,7 @@ def add_hist_set(self, prefix, regions=geo.eta_regions, bins=[]): for region in regions: name = prefix + region if name in self._hists: - logger.warn('Overwriting existing histogram {0}'.format(name)) + logger.warning('Overwriting existing histogram {0}'.format(name)) del self._hists[name] logger.debug('Adding histogram {0}'.format(name)) self._hists[name] = Hist(bins, name=name) diff --git a/cmsl1t/producers/met.py b/cmsl1t/producers/met.py index ea494fc2aa4..658a317db7d 100644 --- a/cmsl1t/producers/met.py +++ b/cmsl1t/producers/met.py @@ -76,7 +76,7 @@ def __init__(self, inputs, outputs, **kwargs): self._method = Producer.METHODS[params['method']] else: msg = 'Could not find specified MET method, using default.' - logger.warn(msg) + logger.warning(msg) self._method = Producer.METHODS['default'] def produce(self, event): diff --git a/cmsl1t/producers/met_vectorized.py b/cmsl1t/producers/met_vectorized.py index 95ecaf03f38..573d2e45da2 100644 --- a/cmsl1t/producers/met_vectorized.py +++ b/cmsl1t/producers/met_vectorized.py @@ -79,7 +79,7 @@ def __init__(self, inputs, outputs, **kwargs): self._method = Producer.METHODS[params['method']] else: msg = 'Could not find specified MET method, using default.' - logger.warn(msg) + logger.warning(msg) self._method = Producer.METHODS['default'] def produce(self, event): diff --git a/config/demo.yaml b/config/demo.yaml index ceadd10ad1a..70037422dc2 100644 --- a/config/demo.yaml +++ b/config/demo.yaml @@ -52,6 +52,7 @@ analysis: outputs: - l1MetNot28HF method: l1MetNot28HF + filters: [] output: # template is a list here that is joined (os.path.join) in the config parser diff --git a/requirements.txt b/requirements.txt index 946323ba768..14aa53a4ea7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +aghast +git+https://github.com/scikit-hep/boost-histogram.git@develop numpy matplotlib pandas==0.23 diff --git a/test/collections/test_baseHistcollection.py b/test/collections/test_baseHistcollection.py index 783c75555df..1807aa8ab60 100644 --- a/test/collections/test_baseHistcollection.py +++ b/test/collections/test_baseHistcollection.py @@ -1,6 +1,8 @@ -from cmsl1t.collections import BaseHistCollection -import unittest from collections import defaultdict +import pytest +import unittest + +from cmsl1t.collections import BaseHistCollection class TestBaseHistCollection(unittest.TestCase): @@ -9,6 +11,7 @@ def test_dimensions(self): dimensions = 4 initial_value = 0 hists = BaseHistCollection(dimensions, initial_value) + self.assertEqual(len(hists), 0) self.assertIs(type(hists[1]), defaultdict) self.assertIs(type(hists[1][2][3][4]), type(initial_value)) self.assertEqual(hists[1][2][3][4], initial_value) @@ -23,3 +26,9 @@ def test_dimensions(self): # length_from_iterator = len(list(six.itervalues(hists))) # self.assertEqual(length_from_iterator, 3) + + +@pytest.mark.parametrize("dimensions", [1, 2, 3]) +def test_empty(dimensions): + c = BaseHistCollection(dimensions) + assert len(c) == 0 diff --git a/test/collections/test_boost_histogram.py b/test/collections/test_boost_histogram.py new file mode 100644 index 00000000000..6b30dd0e176 --- /dev/null +++ b/test/collections/test_boost_histogram.py @@ -0,0 +1,31 @@ +# import aghast +import awkward +import boost.histogram as bh +import numpy as np + + +def test_fill(): + pileup_bins = [0, 10, 15, 20, 30, 999] + jet_pt_bins = [35, 90, 120] + hist = bh.histogram( + bh.axis.variable(pileup_bins), + bh.axis.variable(jet_pt_bins, bh.storage.weight()), + ) + + ets = awkward.fromiter([ + np.random.poisson(30, 5), + np.random.poisson(30, 2), + np.random.poisson(30, 3), + ]) + repeat = ets.stops - ets.starts + + weights = np.ones(len(ets)) + weights = np.repeat(weights, repeat, axis=0) + pileup = np.random.poisson(50, len(ets)) + pileup = np.repeat(pileup, repeat, axis=0) + # expand pileup to size ets + assert len(pileup) == len(ets.content) + # weights are not yet supported + # hist.fill(pileup, ets.content, bh.weight(weights)) + hist.fill(pileup, ets.content) + # hist(pileup, ets.content) diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py new file mode 100644 index 00000000000..13e6c17260a --- /dev/null +++ b/test/collections/test_vectorized.py @@ -0,0 +1,158 @@ +import awkward +import pytest +import numpy as np + +from cmsl1t.collections import VectorizedHistCollection +from cmsl1t.collections.vectorized import VectorizedBinProxy, VectorizedHistProxy, extend, split_input + + +@pytest.fixture +def scalarBins(): + return [0, 10, 15, 20, 30, 999] + + +@pytest.fixture +def collection(scalarBins): + coll = VectorizedHistCollection(scalarBins) + return coll + + +@pytest.fixture +def scalarDistribution(): + return [1, 12, 1, 50] + + +@pytest.fixture +def vectorDistribution(): + return awkward.fromiter([ + [60, 50, 40, 30, 20], + [32, 23], + [56, 34, 31], + [], + ]) + + +@pytest.fixture(params=['event_weights', 'vector_weights', 'flat_vector_weights']) +def weights(vectorDistribution, request): + if request.param == 'event_weights': + return np.ones(np.size(vectorDistribution)) + if request.param == 'vector_weights': + return awkward.JaggedArray.fromoffsets( + (vectorDistribution.starts, vectorDistribution.stops), + np.ones(np.size(vectorDistribution.content)) + ) + return np.ones(np.size(vectorDistribution.content)) + + +@pytest.mark.parametrize( + "values,expected", + [ + ([1, 12, 1, 50], [1, 2, 1, 5]), + ([1, 11, 1111], [1, 2, 6]), + ([-10, 1111, 20], [0, 6, 4]), + ]) +def test_inner_index(collection, values, expected): + np.testing.assert_array_equal(collection._get_inner_indices(values), expected) + + +def test_add(collection): + assert len(collection) == 0 + collection.insert('test', bins=[35, 90, 120]) + assert len(collection) == len(collection._innerBins) - 1 + + +def test_access(collection): + collection.insert('test', bins=[35, 90, 120]) + values = [1, 12, 1, 50] + assert collection[values] == collection[1] + collection[12] + collection[1] + collection[50] + # assert type(collection[innerValues]) == Hist + assert type(collection[values]['test']) == VectorizedHistProxy + + +# def test_copy(collection): +# proxy = VectorizedBinProxy(collection, [1, 12, 1, 50]) + + +@pytest.mark.parametrize( + "values,expected", + [ + ([1, 12, 1, 50], [1, 12, 50]), + ([1, 30, 12, 1, 50], [1, 12, 30, 50]), + ]) +def test_bin_proxy_flatten(collection, values, expected): + proxy = VectorizedBinProxy(collection, values) + assert proxy.flatten()._inner_indices.tolist() == expected + + +@pytest.mark.parametrize( + "bins, x, expected", + [ + ( + np.array([1, 12, 1, 50]), + np.array([10, 20, 30, 40]), + [np.array([10, 30]), np.array([20]), np.array([40])] + ), + ( + np.array([1, 1, 1, 2, 1, 2]), + np.array([10, 20, 30, 40, 50, 60]), + [np.array([10, 20, 30, 50]), np.array([40, 60])] + ), + ]) +def test_split(bins, x, expected): + unique_bins = np.unique(bins) + result = [] + for b in unique_bins: + result.append(x[bins == b]) + for chunk, exp in zip(result, expected): + assert chunk.tolist() == exp.tolist() + + +def test_fill(collection, scalarDistribution, vectorDistribution, weights): + expected = [ + [4.0, 4.0, 0.0, 0.0], + [2.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], + ] + hist_name = 'test' + collection.insert(hist_name, bins=[35, 90, 120]) + # event_weights = np.ones(np.size(vectorDistribution.content)) + collection[scalarDistribution][hist_name].fill(vectorDistribution, weights) + for i in range(len(np.unique(scalarDistribution))): + hist = collection[scalarDistribution][hist_name]._get_hist(i + 1) + assert list(hist.y(overflow=True)) == expected[i] + + +def test_extend(): + innerValues = [1, 12, 1, 50] + outerValues = awkward.fromiter([ + [60, 50, 40, 30, 20], + [32, 23], + [56, 34, 31], + [], + ]) + innerValues = extend(innerValues, outerValues.starts, outerValues.stops) + assert len(innerValues) == len(outerValues.content) + + +def test_split_input(): + innerValues = [1, 12, 1, 50] + outerValues = awkward.fromiter([ + [60, 50, 40, 30, 20], + [32, 23], + [56, 34, 31], + [], + ]) + weights = np.ones(len(outerValues.content)) + + expected = [ + (1, [60, 50, 40, 30, 20, 56, 34, 31], list(np.ones(8))), + [12, [32, 23], list(np.ones(2))], + ] + results = list(split_input(innerValues, outerValues, weights)) + assert len(results) == len(expected) + for r, e in zip(results, expected): + i, o, w = r + i_e, o_e, w_e = e + assert i == i_e + assert o.tolist() == o_e + assert w.tolist() == w_e