From 4f90f39a259264a9e69de5fffc2998a9d9a626fa Mon Sep 17 00:00:00 2001
From: hwtest <hwtest@greg-ttl.dyndns.cern.ch>
Date: Wed, 10 Jul 2019 15:19:42 +0200
Subject: [PATCH 01/30] Remove pileup sel

---
 cmsl1t/analyzers/jetMet_analyzer.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/cmsl1t/analyzers/jetMet_analyzer.py b/cmsl1t/analyzers/jetMet_analyzer.py
index d0f963032eb..eb248b56518 100644
--- a/cmsl1t/analyzers/jetMet_analyzer.py
+++ b/cmsl1t/analyzers/jetMet_analyzer.py
@@ -357,12 +357,10 @@ def fill_histograms(self, entry, event):
         if self._doGen:
             genNVtx = event.Generator_nVtx
 
-        # TODO: vectorize
-        # pileup = self._lumiMu[(event['run'], event['lumi'])]
-        pileup = 51
+        pileup = self._lumiMu[(event['run'], event['lumi'])]
         # print pileup
-        if pileup >= 60 or pileup < 50:
-            return True
+        # if pileup >= 60 or pileup < 50:
+        #    return True
 
         for name in self._sumTypes:
             if 'pfMET' in name and not pfMetFilter(event):

From 13fc486241f6faa93ff3e77fcb1d658fa1c58958 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 3 May 2019 10:02:55 +0100
Subject: [PATCH 02/30] fixed pep8

---
 cmsl1t/analyzers/jetMet_analyzer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmsl1t/analyzers/jetMet_analyzer.py b/cmsl1t/analyzers/jetMet_analyzer.py
index eb248b56518..0b352b0579c 100644
--- a/cmsl1t/analyzers/jetMet_analyzer.py
+++ b/cmsl1t/analyzers/jetMet_analyzer.py
@@ -119,7 +119,7 @@ def __init__(self, **kwargs):
 
         lumiMuDict = dict()
         run_lumi_csv = os.path.join(cmsl1t.PROJECT_ROOT, 'run_lumi.csv')
-        with open(run_lumi_csv) as runLumiFile:
+        with open(run_lumi_csv, 'rb') as runLumiFile:
             reader = csv.reader(runLumiFile, delimiter=',')
             for line in reader:
                 lumiMuDict[(int(line[1]), int(line[2]))] = float(line[3])

From b9bbcba8453c91a3eafc4681a0a5bf4c4dbd3e0f Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 31 May 2019 14:58:49 +0100
Subject: [PATCH 03/30] added vectorized version of all2017.yml

---
 cmsl1t/analyzers/jetMet_analyzer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cmsl1t/analyzers/jetMet_analyzer.py b/cmsl1t/analyzers/jetMet_analyzer.py
index 0b352b0579c..a04f1ebf580 100644
--- a/cmsl1t/analyzers/jetMet_analyzer.py
+++ b/cmsl1t/analyzers/jetMet_analyzer.py
@@ -357,7 +357,9 @@ def fill_histograms(self, entry, event):
         if self._doGen:
             genNVtx = event.Generator_nVtx
 
-        pileup = self._lumiMu[(event['run'], event['lumi'])]
+        # TODO: vectorize
+        # pileup = self._lumiMu[(event['run'], event['lumi'])]
+        pileup = 51
         # print pileup
         # if pileup >= 60 or pileup < 50:
         #    return True

From b5732c7385d54e15e8145a40a939567f2d920991 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Tue, 9 Jul 2019 10:44:59 +0100
Subject: [PATCH 04/30] added test for boost histogram

---
 test/collections/test_boost_histogram.py | 29 ++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 test/collections/test_boost_histogram.py

diff --git a/test/collections/test_boost_histogram.py b/test/collections/test_boost_histogram.py
new file mode 100644
index 00000000000..183d06d02ea
--- /dev/null
+++ b/test/collections/test_boost_histogram.py
@@ -0,0 +1,29 @@
+# import aghast
+import awkward
+import boost.histogram as bh
+import numpy as np
+
+
+def test_fill():
+    pileup_bins = [0, 10, 15, 20, 30, 999]
+    jet_pt_bins = [35, 90, 120]
+    hist = bh.histogram(
+        bh.axis.variable(pileup_bins),
+        bh.axis.variable(jet_pt_bins, bh.storage.weight()),
+    )
+
+    ets = awkward.fromiter([
+        np.random.poisson(30, 5),
+        np.random.poisson(30, 2),
+        np.random.poisson(30, 3),
+    ])
+    repeat = ets.stops - ets.starts
+
+    weights = np.ones(len(ets))
+    weights = np.repeat(weights, repeat, axis=0)
+    pileup = np.random.poisson(50, len(ets))
+    pileup = np.repeat(pileup, repeat, axis=0)
+    # expand pileup to size ets
+    assert len(pileup) == len(ets.content)
+    # hist.fill(pileup, ets.content, bh.weight(weights))
+    hist(pileup, ets.content)

From 90c67baf0d9e979dbf6142010e372a47d80735cd Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Tue, 9 Jul 2019 10:45:23 +0100
Subject: [PATCH 05/30] added aghast and boost_histogram to requirements

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 946323ba768..14aa53a4ea7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
+aghast
+git+https://github.com/scikit-hep/boost-histogram.git@develop
 numpy
 matplotlib
 pandas==0.23

From 5946a249d3c2cd437515ddeff33517a059ae1294 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Tue, 9 Jul 2019 10:49:19 +0100
Subject: [PATCH 06/30] added draft and first tests for vectorized Histogram
 collection

---
 cmsl1t/collections/__init__.py      |  2 ++
 cmsl1t/collections/vectorized.py    | 28 ++++++++++++++++++++++++++++
 test/collections/test_vectorized.py | 19 +++++++++++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 cmsl1t/collections/vectorized.py
 create mode 100644 test/collections/test_vectorized.py

diff --git a/cmsl1t/collections/__init__.py b/cmsl1t/collections/__init__.py
index daab1251d3d..30722f35d3e 100644
--- a/cmsl1t/collections/__init__.py
+++ b/cmsl1t/collections/__init__.py
@@ -5,10 +5,12 @@
 from .by_pileup import HistogramsByPileUpCollection
 from .resolution import ResolutionCollection
 from .efficiency import EfficiencyCollection
+from .vectorized import VectorizedHistCollection
 
 __all__ = [
     'BaseHistCollection',
     'HistogramsByPileUpCollection',
     'ResolutionCollection',
     'EfficiencyCollection',
+    'VectorizedHistCollection',
 ]
diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
new file mode 100644
index 00000000000..58f9ce82b26
--- /dev/null
+++ b/cmsl1t/collections/vectorized.py
@@ -0,0 +1,28 @@
+import numbda
+
+from . import BaseHistCollection
+
+
+@numba.jit(nopython=True)
+def extend(arr1, starts, stops):
+    repeat = stops - starts
+    return np.repeat(arr1, repeat, axis=0)
+
+
+class VectorizedHistCollection(object):
+
+    def __init__(self, innerBins):
+        self._innerBins = innerBins
+        self._innerHist = Hist(100, 0, 100, name='inner')
+
+    def _get_inner_indices(self, values):
+        '''
+            Returns the pileup bin corresponding to the provided pileup value.
+             - bin 0 is underflow
+             - bin len(innerBins) is overflow
+
+            :Example:
+                >>> hists = VectorizedHistCollection(innerBins=[0,10,15,20,30,999])
+                >>> hists._get_inner_indices([1, 11, 1111]) # returns [0, 1, 5]
+        '''
+        return np.digitize(values, self._innerBins)
diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
new file mode 100644
index 00000000000..a0eef2bfcb9
--- /dev/null
+++ b/test/collections/test_vectorized.py
@@ -0,0 +1,19 @@
+import pytest
+import numpy as np
+from rootpy.plotting import Hist
+
+from cmsl1t.collections import VectorizedHistCollection
+
+
+@pytest.mark.parametrize(
+    "values,expected",
+    [
+        ([1, 12, 1, 50], [1, 2, 1, 5]),
+        ([1, 11, 1111], [1, 2, 6]),
+        ([-10, 1111, 20], [0, 6, 4]),
+    ])
+def test_inner_index(values, expected):
+    innerBins = np.array([0, 10, 15, 20, 30, 999])
+    coll = VectorizedHistCollection(innerBins)
+
+    np.testing.assert_array_equal(coll._get_inner_indices(values), expected)

From 01e5951d9716f08e2f256d1dfd6d2d5a41c81b56 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Tue, 9 Jul 2019 13:27:53 +0100
Subject: [PATCH 07/30] logger.warn (deprecated) -> logger.warning

---
 cmsl1t/__init__.py                 | 4 ++--
 cmsl1t/collections/by_pileup.py    | 2 +-
 cmsl1t/collections/efficiency.py   | 6 +++---
 cmsl1t/collections/resolution.py   | 4 ++--
 cmsl1t/config.py                   | 2 +-
 cmsl1t/playground/eventreader.py   | 2 +-
 cmsl1t/playground/resolution.py    | 2 +-
 cmsl1t/producers/met.py            | 2 +-
 cmsl1t/producers/met_vectorized.py | 2 +-
 9 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/cmsl1t/__init__.py b/cmsl1t/__init__.py
index 910dca48cd5..8e8605ed081 100644
--- a/cmsl1t/__init__.py
+++ b/cmsl1t/__init__.py
@@ -22,8 +22,8 @@
 logger.addHandler(ch)
 
 if 'PROJECT_ROOT' not in os.environ:
-    logger.warn("Could not find environmental variable 'PROJECT_ROOT'")
-    logger.warn("You should to run 'source setup.sh' first!")
+    logger.warning("Could not find environmental variable 'PROJECT_ROOT'")
+    logger.warning("You should to run 'source setup.sh' first!")
     HERE = path.dirname(path.abspath(__file__))
     PROJECT_ROOT = path.abspath(path.join(HERE, path.pardir))
 else:
diff --git a/cmsl1t/collections/by_pileup.py b/cmsl1t/collections/by_pileup.py
index 42d9f2c15f7..e5c6011182b 100644
--- a/cmsl1t/collections/by_pileup.py
+++ b/cmsl1t/collections/by_pileup.py
@@ -39,7 +39,7 @@ def add(self, hist_name, bins=[]):
                 'No bins specified for histogram {0}'.format(hist_name))
 
         if hist_name in self[self._pileupBins[0]].keys():
-            logger.warn('Histogram {0} already exists!'.format(hist_name))
+            logger.warning('Histogram {0} already exists!'.format(hist_name))
             return
         hist_names = []
         add_name = hist_names.append
diff --git a/cmsl1t/collections/efficiency.py b/cmsl1t/collections/efficiency.py
index f897c7d1f93..e4d05e9b578 100644
--- a/cmsl1t/collections/efficiency.py
+++ b/cmsl1t/collections/efficiency.py
@@ -100,7 +100,7 @@ def add_variable(self, variable, bins, thresholds):
         """
         # TODO: this will no longer work since 1st dimension is pileup
         if variable in self.keys():
-            logger.warn('Variable {0} already exists!')
+            logger.warning('Variable {0} already exists!')
             return
         self._thresholds[variable] = thresholds
         hist_names = []
@@ -123,7 +123,7 @@ def fill(self, hist_name, recoValue, l1Value, w=1.0):
             logger.error('Histogram {0} does not exist'.format(hist_name))
             return
         if hist_name not in self._thresholds:
-            logger.warn('No valid current thresholds.')
+            logger.warning('No valid current thresholds.')
         for threshold in self._thresholds[hist_name]:
             h[threshold].fill(recoValue, l1Value, w)
 
@@ -136,7 +136,7 @@ def fill_array(self, hist_name, recoValue, l1Value, w=None):
             logger.error('Histogram {0} does not exist'.format(hist_name))
             return
         if hist_name not in self._thresholds:
-            logger.warn('No valid current thresholds.')
+            logger.warning('No valid current thresholds.')
         for threshold in self._thresholds[hist_name]:
             h[threshold].fill_array(recoValue, l1Value, w)
 
diff --git a/cmsl1t/collections/resolution.py b/cmsl1t/collections/resolution.py
index 776ace482d2..acad3b9b8e8 100644
--- a/cmsl1t/collections/resolution.py
+++ b/cmsl1t/collections/resolution.py
@@ -55,7 +55,7 @@ def fill(self, hist_name, x, w=1.0):
             logger.error('Histogram {0} does not exist'.format(hist_name))
             return
         if not self._currentRegions:
-            logger.warn(
+            logger.warning(
                 'No valid current regions. Did you set_region_by_eta()?')
         for region in self._currentRegions:
             h[region].fill(x, w)
@@ -63,7 +63,7 @@ def fill(self, hist_name, x, w=1.0):
     def add_variable(self, variable, bins=[]):
         from rootpy.plotting import Hist
         if variable in self.keys():
-            logger.warn('Variable {0} already exists!')
+            logger.warning('Variable {0} already exists!')
             return
         hist_names = []
         add_name = hist_names.append
diff --git a/cmsl1t/config.py b/cmsl1t/config.py
index ac154578ff0..300b936ef05 100644
--- a/cmsl1t/config.py
+++ b/cmsl1t/config.py
@@ -314,7 +314,7 @@ def reduce_scope_for_analyzer(self, analyzer_name):
         forbidden_local_settings = ['name', 'input_files']
         for s in forbidden_local_settings:
             if s in analyzer:
-                logger.warn('Setting {0} is forbidden in analysis::analyzers::{1}'.format(s, analyzer_name))
+                logger.warning('Setting {0} is forbidden in analysis::analyzers::{1}'.format(s, analyzer_name))
                 analyzer.pop(s)
 
         global_settings = dict(
diff --git a/cmsl1t/playground/eventreader.py b/cmsl1t/playground/eventreader.py
index 9b9480cdc94..987601796db 100644
--- a/cmsl1t/playground/eventreader.py
+++ b/cmsl1t/playground/eventreader.py
@@ -367,7 +367,7 @@ def __init__(self, files, events=-1, load_trees=['event', 'upgrade']):
             try:
                 chain = TreeChain(path, input_files, cache=True, events=events)
             except RuntimeError:
-                logger.warn("Cannot find tree: {0} in input file".format(path))
+                logger.warning("Cannot find tree: {0} in input file".format(path))
                 continue
             self._names.append(name)
             self._trees.append(chain)
diff --git a/cmsl1t/playground/resolution.py b/cmsl1t/playground/resolution.py
index a7969f4d2c1..0b76f0909bd 100644
--- a/cmsl1t/playground/resolution.py
+++ b/cmsl1t/playground/resolution.py
@@ -36,7 +36,7 @@ def add_hist_set(self, prefix, regions=geo.eta_regions, bins=[]):
         for region in regions:
             name = prefix + region
             if name in self._hists:
-                logger.warn('Overwriting existing histogram {0}'.format(name))
+                logger.warning('Overwriting existing histogram {0}'.format(name))
                 del self._hists[name]
             logger.debug('Adding histogram {0}'.format(name))
             self._hists[name] = Hist(bins, name=name)
diff --git a/cmsl1t/producers/met.py b/cmsl1t/producers/met.py
index ea494fc2aa4..658a317db7d 100644
--- a/cmsl1t/producers/met.py
+++ b/cmsl1t/producers/met.py
@@ -76,7 +76,7 @@ def __init__(self, inputs, outputs, **kwargs):
             self._method = Producer.METHODS[params['method']]
         else:
             msg = 'Could not find specified MET method, using default.'
-            logger.warn(msg)
+            logger.warning(msg)
             self._method = Producer.METHODS['default']
 
     def produce(self, event):
diff --git a/cmsl1t/producers/met_vectorized.py b/cmsl1t/producers/met_vectorized.py
index 95ecaf03f38..573d2e45da2 100644
--- a/cmsl1t/producers/met_vectorized.py
+++ b/cmsl1t/producers/met_vectorized.py
@@ -79,7 +79,7 @@ def __init__(self, inputs, outputs, **kwargs):
             self._method = Producer.METHODS[params['method']]
         else:
             msg = 'Could not find specified MET method, using default.'
-            logger.warn(msg)
+            logger.warning(msg)
             self._method = Producer.METHODS['default']
 
     def produce(self, event):

From aecf64c6dd98ee0334826952cef7a59457b82751 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Tue, 9 Jul 2019 13:28:38 +0100
Subject: [PATCH 08/30] fixed "len" for 1-dim collections

---
 cmsl1t/collections/base.py                  | 12 ++++++------
 test/collections/test_baseHistcollection.py | 13 +++++++++++--
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/cmsl1t/collections/base.py b/cmsl1t/collections/base.py
index 479ff9fd8b3..4b355d61199 100644
--- a/cmsl1t/collections/base.py
+++ b/cmsl1t/collections/base.py
@@ -20,10 +20,10 @@
 logger = logging.getLogger(__name__)
 
 
-def create_n_dim_dict(dimensions, initiaValue=0):
+def create_n_dim_dict(dimensions, initialValue=0):
     if dimensions < 1:
-        return initiaValue
-    factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initiaValue=initiaValue)
+        return initialValue
+    factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initialValue=initialValue)
     return defaultdict(factory)
 
 
@@ -40,20 +40,20 @@ def create_n_dim_dict(dimensions, initiaValue=0):
 
 def len_n_dim_dict(dictionary, dimensions):
     if dimensions <= 1:
-        return len(dictionary)
+        return len(dictionary.keys())
     return sum(len_n_dim_dict(v, dimensions - 1)
                for v in six.itervalues(dictionary))
 
 
 class BaseHistCollection(defaultdict):
 
-    def __init__(self, dimensions, initiaValue=0):
+    def __init__(self, dimensions, initialValue=0):
         '''
             For each dimension create a dictionary
         '''
         # TODO: add possibility for different lambda expresions for each
         # dimension. This will allow to have custom dicts in certain dimensions
-        factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initiaValue=initiaValue)
+        factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initialValue=initialValue)
         if sys.version_info[0] < 3:
             defaultdict.__init__(self, factory)
         else:
diff --git a/test/collections/test_baseHistcollection.py b/test/collections/test_baseHistcollection.py
index 783c75555df..1807aa8ab60 100644
--- a/test/collections/test_baseHistcollection.py
+++ b/test/collections/test_baseHistcollection.py
@@ -1,6 +1,8 @@
-from cmsl1t.collections import BaseHistCollection
-import unittest
 from collections import defaultdict
+import pytest
+import unittest
+
+from cmsl1t.collections import BaseHistCollection
 
 
 class TestBaseHistCollection(unittest.TestCase):
@@ -9,6 +11,7 @@ def test_dimensions(self):
         dimensions = 4
         initial_value = 0
         hists = BaseHistCollection(dimensions, initial_value)
+        self.assertEqual(len(hists), 0)
         self.assertIs(type(hists[1]), defaultdict)
         self.assertIs(type(hists[1][2][3][4]), type(initial_value))
         self.assertEqual(hists[1][2][3][4], initial_value)
@@ -23,3 +26,9 @@ def test_dimensions(self):
 
         # length_from_iterator = len(list(six.itervalues(hists)))
         # self.assertEqual(length_from_iterator, 3)
+
+
+@pytest.mark.parametrize("dimensions", [1, 2, 3])
+def test_empty(dimensions):
+    c = BaseHistCollection(dimensions)
+    assert len(c) == 0

From c01c49db2ebbaf6e2f4c55826c51600760e41d3e Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Wed, 10 Jul 2019 16:33:41 +0100
Subject: [PATCH 09/30] added VectorizedHistCollection.add

---
 cmsl1t/__init__.py                  |  6 +++-
 cmsl1t/collections/vectorized.py    | 53 ++++++++++++++++++++++++++---
 test/collections/test_vectorized.py | 29 +++++++++++++---
 3 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/cmsl1t/__init__.py b/cmsl1t/__init__.py
index 8e8605ed081..ee760307d2c 100644
--- a/cmsl1t/__init__.py
+++ b/cmsl1t/__init__.py
@@ -1,7 +1,9 @@
 from __future__ import absolute_import
+import logging
 import os
 from os import path
-import logging
+import sys
+
 
 __version__ = '0.5.1'
 
@@ -28,3 +30,5 @@
     PROJECT_ROOT = path.abspath(path.join(HERE, path.pardir))
 else:
     PROJECT_ROOT = os.environ['PROJECT_ROOT']
+
+PY3 = sys.version_info[0] == 3
diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index 58f9ce82b26..ba72a4f9274 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -1,6 +1,14 @@
-import numbda
+from collections import defaultdict
+import logging
+import numba
+import numpy as np
+from rootpy.plotting import Hist
 
 from . import BaseHistCollection
+from ..utils.iterators import pairwise
+from .. import PY3
+
+logger = logging.getLogger(__name__)
 
 
 @numba.jit(nopython=True)
@@ -9,12 +17,24 @@ def extend(arr1, starts, stops):
     return np.repeat(arr1, repeat, axis=0)
 
 
-class VectorizedHistCollection(object):
+class VectorizedHistCollection(BaseHistCollection):
+
+    def __init__(self, innerBins, innerLabel='inner', **kwargs):
+        # if we want to generalize to N dim, innerBins needs to be an array of innerBins
+        dimensions = kwargs.pop('dimensions', 2)
+        if PY3:
+            super(VectorizedHistCollection, self).__init__(dimensions)
+        else:
+            BaseHistCollection.__init__(self, dimensions)
 
-    def __init__(self, innerBins):
         self._innerBins = innerBins
+        self._innerLabel = innerLabel
         self._innerHist = Hist(100, 0, 100, name='inner')
 
+    def __getitem__(self, key):
+        real_key = self._get_inner_indices(key)
+        return defaultdict.__getitem__(self, real_key)
+
     def _get_inner_indices(self, values):
         '''
             Returns the pileup bin corresponding to the provided pileup value.
@@ -23,6 +43,31 @@ def _get_inner_indices(self, values):
 
             :Example:
                 >>> hists = VectorizedHistCollection(innerBins=[0,10,15,20,30,999])
-                >>> hists._get_inner_indices([1, 11, 1111]) # returns [0, 1, 5]
+                >>> hists._get_inner_indices([1, 11, 1111]) # returns [1, 2, 6]
         '''
         return np.digitize(values, self._innerBins)
+
+    def add(self, name, bins, hist_type=Hist):
+
+        bins = np.asarray(bins)
+        if bins.size == 0:
+            logger.error(
+                'No bins specified for histogram {0}'.format(hist_name))
+
+        if name in self[1]:
+            logger.warning('Histogram {0} already exists!'.format(hist_name))
+            return
+        names = []
+        add_name = names.append
+        print(self)
+
+        for i, (lowerEdge, upperEdge) in enumerate(pairwise(self._innerBins)):
+            hist_name = f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
+            if i + 1 not in self or hist_name not in self[i + 1]:
+                add_name(hist_name)
+                self[i + 1][hist_name] = Hist(bins, name=hist_name)
+        logger.debug('Created {0} histograms: {1}'.format(
+            len(names), ', '.join(names)))
+
+    def fill(self):
+        pass
diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index a0eef2bfcb9..7a11625e557 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -1,3 +1,4 @@
+import awkward
 import pytest
 import numpy as np
 from rootpy.plotting import Hist
@@ -5,6 +6,14 @@
 from cmsl1t.collections import VectorizedHistCollection
 
 
+@pytest.fixture
+def collection():
+    innerBins = np.array([0, 10, 15, 20, 30, 999])
+    coll = VectorizedHistCollection(innerBins)
+    # fill for [35, 90, 120]
+    return coll
+
+
 @pytest.mark.parametrize(
     "values,expected",
     [
@@ -12,8 +21,20 @@
         ([1, 11, 1111], [1, 2, 6]),
         ([-10, 1111, 20], [0, 6, 4]),
     ])
-def test_inner_index(values, expected):
-    innerBins = np.array([0, 10, 15, 20, 30, 999])
-    coll = VectorizedHistCollection(innerBins)
+def test_inner_index(collection, values, expected):
+    np.testing.assert_array_equal(collection._get_inner_indices(values), expected)
+
+
+def test_add(collection):
+    assert len(collection) == 0
+    collection.add('test', bins=[35, 90, 120])
+    assert len(collection) == len(collection._innerBins) - 1
 
-    np.testing.assert_array_equal(coll._get_inner_indices(values), expected)
+# def test_fill(collection):
+#     innerValues = [1, 12, 1, 50]
+#     outerValues = awkward.fromiter([
+#         [60, 50, 40, 30, 20],
+#         [32, 23],
+#         [56, 34, 31],
+#     ])
+#     collection.add('test', bins=[35, 90, 120])

From 15bdaf3341f3fb42415c77c3b3696c95440e3a85 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Wed, 17 Jul 2019 15:03:16 +0100
Subject: [PATCH 10/30] added Bin and Hist proxy objects for histogram
 collection

---
 cmsl1t/collections/vectorized.py    | 63 +++++++++++++++++++++++++----
 test/collections/test_vectorized.py | 26 ++++++++++++
 2 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index ba72a4f9274..d1d440b4665 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -32,8 +32,14 @@ def __init__(self, innerBins, innerLabel='inner', **kwargs):
         self._innerHist = Hist(100, 0, 100, name='inner')
 
     def __getitem__(self, key):
-        real_key = self._get_inner_indices(key)
-        return defaultdict.__getitem__(self, real_key)
+        if not isinstance(key, (list, np.ndarray, np.generic)):
+            key = np.array([key])
+        real_keys = self._get_inner_indices(key)
+        # Python tries to copy the whole nested default dict ... which is infinite
+        # print(key, real_keys)
+        # return object()
+        return VectorizedBinProxy(self, real_keys)
+        return [defaultdict.__getitem__(self, k) for k in real_keys.tolist()]
 
     def _get_inner_indices(self, values):
         '''
@@ -54,20 +60,61 @@ def add(self, name, bins, hist_type=Hist):
             logger.error(
                 'No bins specified for histogram {0}'.format(hist_name))
 
-        if name in self[1]:
+        if name in defaultdict.__getitem__(self, 1):
             logger.warning('Histogram {0} already exists!'.format(hist_name))
             return
         names = []
         add_name = names.append
-        print(self)
 
         for i, (lowerEdge, upperEdge) in enumerate(pairwise(self._innerBins)):
             hist_name = f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
-            if i + 1 not in self or hist_name not in self[i + 1]:
+            if i + 1 not in self or hist_name not in defaultdict.__getitem__(self, i + 1):
                 add_name(hist_name)
-                self[i + 1][hist_name] = Hist(bins, name=hist_name)
+                defaultdict.__getitem__(self, i + 1)[hist_name] = Hist(bins, name=hist_name)
         logger.debug('Created {0} histograms: {1}'.format(
             len(names), ', '.join(names)))
 
-    def fill(self):
-        pass
+    def fill(self, x, w=None):
+        if w is None:
+            w = np.ones()
+
+
+
+class VectorizedBinProxy(object):
+
+    def __init__(self, collection, inner_indices):
+        self.collection = collection
+        self._inner_indices = inner_indices
+
+    def __getitem__(self, key):
+        # TODO, if key != string, return a BinProxy
+        return VectorizedHistProxy(self, key)
+
+    def __add__(self, other):
+        if self.collection != other.collection:
+            msg = 'Cannot add VectorizedBinProxy for two different collections'
+            logger.error(msg)
+            raise ValueError(msg)
+        self._inner_indices = np.append(self._inner_indices, other._inner_indices)
+        return self
+
+    def __eq__(self, other):
+        if self.collection != other.collection:
+            msg = 'Cannot compare VectorizedBinProxy for two different collections'
+            logger.error(msg)
+            raise ValueError(msg)
+        return self._inner_indices.tolist() == other._inner_indices.tolist()
+
+    def flatten(self):
+        self._inner_indices = np.unique(self._inner_indices)
+        return self
+
+class VectorizedHistProxy(object):
+
+    def __init__(self, bin_proxy, hist_name):
+        self._bin_proxy = bin_proxy.flatten()
+        self._hist_name = hist_name
+
+    def fill(self, x, w=None):
+        if w is None:
+            w = np.ones(x)
diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index 7a11625e557..2f05e53c857 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -4,6 +4,7 @@
 from rootpy.plotting import Hist
 
 from cmsl1t.collections import VectorizedHistCollection
+from cmsl1t.collections.vectorized import VectorizedBinProxy, VectorizedHistProxy
 
 
 @pytest.fixture
@@ -30,6 +31,29 @@ def test_add(collection):
     collection.add('test', bins=[35, 90, 120])
     assert len(collection) == len(collection._innerBins) - 1
 
+
+def test_access(collection):
+    collection.add('test', bins=[35, 90, 120])
+    innerValues = [1, 12, 1, 50]
+    assert collection[innerValues] == collection[1] + collection[12] + collection[1] + collection[50]
+    # assert type(collection[innerValues]) == Hist
+    assert type(collection[innerValues]['test']) == VectorizedHistProxy
+
+
+def test_copy(collection):
+    proxy = VectorizedBinProxy(collection, [1, 12, 1, 50])
+
+
+@pytest.mark.parametrize(
+    "values,expected",
+    [
+        ([1, 12, 1, 50], [1, 12, 50]),
+        ([1, 30, 12, 1, 50], [1, 12, 30, 50]),
+    ])
+def test_bin_proxy_flatten(collection, values, expected):
+    proxy = VectorizedBinProxy(collection, values)
+    assert proxy.flatten()._inner_indices.tolist() == expected
+
 # def test_fill(collection):
 #     innerValues = [1, 12, 1, 50]
 #     outerValues = awkward.fromiter([
@@ -38,3 +62,5 @@ def test_add(collection):
 #         [56, 34, 31],
 #     ])
 #     collection.add('test', bins=[35, 90, 120])
+#     weights = np.ones(len(outerValues.content))
+#     collection[innerValues][hist_name].fill(outerValues, weights)

From f6326ca599aa302387720a3deb4d919c5de45b43 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Wed, 17 Jul 2019 16:53:27 +0100
Subject: [PATCH 11/30] implemented vectorized filling of histograms

---
 cmsl1t/collections/vectorized.py    | 43 +++++++++++++-----
 test/collections/test_vectorized.py | 69 ++++++++++++++++++++++++-----
 2 files changed, 90 insertions(+), 22 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index d1d440b4665..7fb0fb3af79 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -11,7 +11,6 @@
 logger = logging.getLogger(__name__)
 
 
-@numba.jit(nopython=True)
 def extend(arr1, starts, stops):
     repeat = stops - starts
     return np.repeat(arr1, repeat, axis=0)
@@ -21,6 +20,7 @@ class VectorizedHistCollection(BaseHistCollection):
 
     def __init__(self, innerBins, innerLabel='inner', **kwargs):
         # if we want to generalize to N dim, innerBins needs to be an array of innerBins
+        # TODO: last dimension should probably be a normal dictionary
         dimensions = kwargs.pop('dimensions', 2)
         if PY3:
             super(VectorizedHistCollection, self).__init__(dimensions)
@@ -35,11 +35,8 @@ def __getitem__(self, key):
         if not isinstance(key, (list, np.ndarray, np.generic)):
             key = np.array([key])
         real_keys = self._get_inner_indices(key)
-        # Python tries to copy the whole nested default dict ... which is infinite
-        # print(key, real_keys)
-        # return object()
         return VectorizedBinProxy(self, real_keys)
-        return [defaultdict.__getitem__(self, k) for k in real_keys.tolist()]
+        # return [defaultdict.__getitem__(self, k) for k in real_keys.tolist()]
 
     def _get_inner_indices(self, values):
         '''
@@ -66,28 +63,35 @@ def add(self, name, bins, hist_type=Hist):
         names = []
         add_name = names.append
 
-        for i, (lowerEdge, upperEdge) in enumerate(pairwise(self._innerBins)):
-            hist_name = f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
+        for i, hist_name in enumerate(self._create_hist_names(name)):
             if i + 1 not in self or hist_name not in defaultdict.__getitem__(self, i + 1):
                 add_name(hist_name)
                 defaultdict.__getitem__(self, i + 1)[hist_name] = Hist(bins, name=hist_name)
         logger.debug('Created {0} histograms: {1}'.format(
             len(names), ', '.join(names)))
 
+    def _create_hist_names(self, name):
+        for i, (lowerEdge, upperEdge) in enumerate(pairwise(self._innerBins)):
+            yield f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
+
+    def get_hist_name(self, name, innerIndex):
+        lowerEdge, upperEdge = self._innerBins[innerIndex - 1], self._innerBins[innerIndex]
+        return f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
+
     def fill(self, x, w=None):
         if w is None:
             w = np.ones()
 
 
-
 class VectorizedBinProxy(object):
 
     def __init__(self, collection, inner_indices):
         self.collection = collection
         self._inner_indices = inner_indices
+        # self._inner_values = inner_values
 
     def __getitem__(self, key):
-        # TODO, if key != string, return a BinProxy
+        # TODO, if key != string, return a BinProxy of the bin above
         return VectorizedHistProxy(self, key)
 
     def __add__(self, other):
@@ -109,12 +113,29 @@ def flatten(self):
         self._inner_indices = np.unique(self._inner_indices)
         return self
 
+
 class VectorizedHistProxy(object):
 
     def __init__(self, bin_proxy, hist_name):
-        self._bin_proxy = bin_proxy.flatten()
+        self._bin_proxy = bin_proxy
         self._hist_name = hist_name
 
+    def _split_input(self, x, w):
+        inner_indices = self._bin_proxy._inner_indices
+        # TODO: what if x is not jagged
+        inner_indices = extend(inner_indices, x.starts, x.stops)
+        for u in np.unique(inner_indices):
+            mask = inner_indices == u
+            yield u, x.content[mask], w[mask]
+
+    def _get_hist(self, inner_index):
+        hist_name = self._bin_proxy.collection.get_hist_name(self._hist_name, inner_index)
+        return defaultdict.__getitem__(self._bin_proxy.collection, inner_index)[hist_name]
+
     def fill(self, x, w=None):
         if w is None:
-            w = np.ones(x)
+            # TODO: what if x is not jagged
+            w = np.ones(len(x.content))
+        for i, x_i, w_i in self._split_input(x, w):
+            hist = self._get_hist(i)
+            hist.fill_array(x_i, w_i)
diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index 2f05e53c857..8ce24536d46 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -4,7 +4,7 @@
 from rootpy.plotting import Hist
 
 from cmsl1t.collections import VectorizedHistCollection
-from cmsl1t.collections.vectorized import VectorizedBinProxy, VectorizedHistProxy
+from cmsl1t.collections.vectorized import VectorizedBinProxy, VectorizedHistProxy, extend
 
 
 @pytest.fixture
@@ -54,13 +54,60 @@ def test_bin_proxy_flatten(collection, values, expected):
     proxy = VectorizedBinProxy(collection, values)
     assert proxy.flatten()._inner_indices.tolist() == expected
 
-# def test_fill(collection):
-#     innerValues = [1, 12, 1, 50]
-#     outerValues = awkward.fromiter([
-#         [60, 50, 40, 30, 20],
-#         [32, 23],
-#         [56, 34, 31],
-#     ])
-#     collection.add('test', bins=[35, 90, 120])
-#     weights = np.ones(len(outerValues.content))
-#     collection[innerValues][hist_name].fill(outerValues, weights)
+
+@pytest.mark.parametrize(
+    "bins, x, expected",
+    [
+        (
+            np.array([1, 12, 1, 50]),
+            np.array([10, 20, 30, 40]),
+            [np.array([10, 30]), np.array([20]), np.array([40])]
+        ),
+        (
+            np.array([1, 1, 1, 2, 1, 2]),
+            np.array([10, 20, 30, 40, 50, 60]),
+            [np.array([10, 20, 30, 50]), np.array([40, 60])]
+        ),
+    ])
+def test_split(bins, x, expected):
+    unique_bins = np.unique(bins)
+    result = []
+    for b in unique_bins:
+        result.append(x[bins == b])
+    for chunk, exp in zip(result, expected):
+        assert chunk.tolist() == exp.tolist()
+
+
+def test_fill(collection):
+    innerValues = [1, 12, 1, 50]
+    outerValues = awkward.fromiter([
+        [60, 50, 40, 30, 20],
+        [32, 23],
+        [56, 34, 31],
+        [],
+    ])
+    expected = [
+        [4.0, 4.0, 0.0, 0.0],
+        [2.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0],
+    ]
+
+    hist_name = 'test'
+    collection.add(hist_name, bins=[35, 90, 120])
+    weights = np.ones(len(outerValues.content))
+    collection[innerValues][hist_name].fill(outerValues, weights)
+    for i in range(len(np.unique(innerValues))):
+        hist = collection[innerValues][hist_name]._get_hist(i + 1)
+        assert list(hist.y(overflow=True)) == expected[i]
+
+
+def test_extend():
+    innerValues = [1, 12, 1, 50]
+    outerValues = awkward.fromiter([
+        [60, 50, 40, 30, 20],
+        [32, 23],
+        [56, 34, 31],
+        [],
+    ])
+    innerValues = extend(innerValues, outerValues.starts, outerValues.stops)
+    assert len(innerValues) == len(outerValues.content)

From e653da76fdd3ce0ca3574cd0e6e8e81d6e9712b4 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Wed, 17 Jul 2019 17:00:34 +0100
Subject: [PATCH 12/30] added VectorizedHistCollection.inner_fill()

---
 cmsl1t/collections/vectorized.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index 7fb0fb3af79..ddc4d41245f 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -78,9 +78,10 @@ def get_hist_name(self, name, innerIndex):
         lowerEdge, upperEdge = self._innerBins[innerIndex - 1], self._innerBins[innerIndex]
         return f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
 
-    def fill(self, x, w=None):
+    def inner_fill(self, x, w=None):
         if w is None:
-            w = np.ones()
+            w = np.ones(len(x))
+        self._innerHist.fill_array(x, w)
 
 
 class VectorizedBinProxy(object):

From 84294368adcfb490ef1052e73bcc2a6e0ee00415 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Wed, 17 Jul 2019 17:02:18 +0100
Subject: [PATCH 13/30] fix pep8 issues

---
 cmsl1t/collections/vectorized.py    | 5 ++---
 test/collections/test_vectorized.py | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index ddc4d41245f..588a7a77c89 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -1,6 +1,5 @@
 from collections import defaultdict
 import logging
-import numba
 import numpy as np
 from rootpy.plotting import Hist
 
@@ -55,10 +54,10 @@ def add(self, name, bins, hist_type=Hist):
         bins = np.asarray(bins)
         if bins.size == 0:
             logger.error(
-                'No bins specified for histogram {0}'.format(hist_name))
+                'No bins specified for histogram {0}'.format(name))
 
         if name in defaultdict.__getitem__(self, 1):
-            logger.warning('Histogram {0} already exists!'.format(hist_name))
+            logger.warning('Histogram {0} already exists!'.format(name))
             return
         names = []
         add_name = names.append
diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index 8ce24536d46..b61fae8598f 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -1,7 +1,6 @@
 import awkward
 import pytest
 import numpy as np
-from rootpy.plotting import Hist
 
 from cmsl1t.collections import VectorizedHistCollection
 from cmsl1t.collections.vectorized import VectorizedBinProxy, VectorizedHistProxy, extend

From 5133fa6ea0d09441582424b32278f5a1e4e0dd1d Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 13:12:22 +0100
Subject: [PATCH 14/30] added filters to demo config

---
 config/demo.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/demo.yaml b/config/demo.yaml
index ceadd10ad1a..70037422dc2 100644
--- a/config/demo.yaml
+++ b/config/demo.yaml
@@ -52,6 +52,7 @@ analysis:
       outputs:
         - l1MetNot28HF
       method: l1MetNot28HF
+  filters: []
 
 output:
   # template is a list here that is joined (os.path.join) in the config parser

From e1e8702581447714dde19afea03477226706aec0 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 14:09:55 +0100
Subject: [PATCH 15/30] improved VectorizedHistCollection to handle numpy
 arrays

---
 cmsl1t/collections/vectorized.py | 33 +++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index 588a7a77c89..e41444e2ab2 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -49,8 +49,8 @@ def _get_inner_indices(self, values):
         '''
         return np.digitize(values, self._innerBins)
 
-    def add(self, name, bins, hist_type=Hist):
-
+    def add(self, name, bins, hist_type=Hist, **kwargs):
+        title = kwargs.pop('title', name)
         bins = np.asarray(bins)
         if bins.size == 0:
             logger.error(
@@ -65,7 +65,7 @@ def add(self, name, bins, hist_type=Hist):
         for i, hist_name in enumerate(self._create_hist_names(name)):
             if i + 1 not in self or hist_name not in defaultdict.__getitem__(self, i + 1):
                 add_name(hist_name)
-                defaultdict.__getitem__(self, i + 1)[hist_name] = Hist(bins, name=hist_name)
+                defaultdict.__getitem__(self, i + 1)[hist_name] = hist_type(bins, name=hist_name, title=title)
         logger.debug('Created {0} histograms: {1}'.format(
             len(names), ', '.join(names)))
 
@@ -79,7 +79,7 @@ def get_hist_name(self, name, innerIndex):
 
     def inner_fill(self, x, w=None):
         if w is None:
-            w = np.ones(len(x))
+            w = np.ones(np.size(x))
         self._innerHist.fill_array(x, w)
 
 
@@ -122,20 +122,35 @@ def __init__(self, bin_proxy, hist_name):
 
     def _split_input(self, x, w):
         inner_indices = self._bin_proxy._inner_indices
-        # TODO: what if x is not jagged
-        inner_indices = extend(inner_indices, x.starts, x.stops)
+        content = x
+        if hasattr(x, 'starts'):
+            inner_indices = extend(inner_indices, x.starts, x.stops)
+            content = x.content
+
         for u in np.unique(inner_indices):
             mask = inner_indices == u
-            yield u, x.content[mask], w[mask]
+            if not isinstance(mask, (list, np.ndarray)):
+                mask = np.array([mask])
+            yield u, content[mask], w[mask]
 
     def _get_hist(self, inner_index):
         hist_name = self._bin_proxy.collection.get_hist_name(self._hist_name, inner_index)
         return defaultdict.__getitem__(self._bin_proxy.collection, inner_index)[hist_name]
 
     def fill(self, x, w=None):
+        if not isinstance(x, (list, np.ndarray)):
+            x = np.array([x])
         if w is None:
-            # TODO: what if x is not jagged
-            w = np.ones(len(x.content))
+            n = np.size(x.content) if hasattr(x, 'content') else np.size(x)
+            w = np.ones(n)
         for i, x_i, w_i in self._split_input(x, w):
             hist = self._get_hist(i)
             hist.fill_array(x_i, w_i)
+
+
+# def split_input():
+#     a = np.array([1, 12, 1, 10, 50, 10])
+#     b = np.array([10, 20, 30, 40, 50, 60])
+#     arg = a.argsort(kind='stable')
+#     offsets, = np.where(np.r_[True, np.diff(a[arg]) > 0])
+#     output = awkward.JaggedArray.fromoffsets(offsets.flatten(), awkward.IndexedArray(arg, b))

From 23d2b11d32ef79268858e07f6a996a2b01f4952c Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 14:47:47 +0100
Subject: [PATCH 16/30] augmented demo analyzer with VectorizedHistCollection

---
 cmsl1t/analyzers/demo_analyzer.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/cmsl1t/analyzers/demo_analyzer.py b/cmsl1t/analyzers/demo_analyzer.py
index a8ff18b0be6..1a0a8a062e8 100644
--- a/cmsl1t/analyzers/demo_analyzer.py
+++ b/cmsl1t/analyzers/demo_analyzer.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from .BaseAnalyzer import BaseAnalyzer
-from cmsl1t.collections import EfficiencyCollection
+from cmsl1t.collections import EfficiencyCollection, VectorizedHistCollection
 
 
 class Analyzer(BaseAnalyzer):
@@ -14,25 +14,30 @@ class Analyzer(BaseAnalyzer):
     def __init__(self, **kwargs):
         super(Analyzer, self).__init__(**kwargs)
 
-        self.met_calcs = dict(
-            RecalcL1EmuMETNot28=dict(
+        self.met_calcs = {
+            self.name + '_' + 'RecalcL1EmuMETNot28': dict(
                 title="Emulated MET, |ieta|<28",
                 attr='l1MetNot28'),
-            RecalcL1EmuMETNot28HF=dict(
+            self.name + '_' + 'RecalcL1EmuMETNot28HF': dict(
                 title="Emulated MET, |ieta|!=28",
                 attr='l1MetNot28HF'),
-        )
+        }
 
     def prepare_for_events(self, reader):
         bins = np.arange(0, 200, 25)
         thresholds = [70, 90, 110]
         puBins = list(range(0, 50, 10)) + [999]
 
+        self.hists = VectorizedHistCollection(innerBins=puBins, innerLabel='pu')
+
         self.efficiencies = EfficiencyCollection(pileupBins=puBins)
         add_met_variable = partial(
             self.efficiencies.add_variable,
             bins=bins, thresholds=thresholds)
         list(map(add_met_variable, self.met_calcs))
+
+        for met, config in self.met_calcs.items():
+            self.hists.add(met, bins=bins, title=config['title'])
         return True
 
     def reload_histograms(self, input_file):
@@ -43,16 +48,19 @@ def reload_histograms(self, input_file):
     def fill_histograms(self, entry, event):
         pileup = event['Vertex_nVtx']
         self.efficiencies.set_pileup(pileup)
+        self.hists.inner_fill(pileup)
 
         offlineMetBE = event.Sums_caloMetBE
         for name, config in self.met_calcs.items():
             onlineMet = event[config['attr']]
             onlineMet = onlineMet.mag
             self.efficiencies.fill_array(name, offlineMetBE, onlineMet)
+            self.hists[pileup][name].fill(offlineMetBE)
         return True
 
     def write_histograms(self):
-        self.efficiencies.to_root(self.get_histogram_filename())
+        self.efficiencies.to_root(self.get_histogram_filename().replace('.root', '_efficiencies.root'))
+        self.hists.to_root(self.get_histogram_filename())
         return True
 
     def make_plots(self):

From 8cde7ef3f3a9ee34480e31f742b134f8d30123f5 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 14:48:29 +0100
Subject: [PATCH 17/30] added hash to inner histogram name for
 VectorizedHistCollection if no name is given (to avoid name clashes)

---
 cmsl1t/collections/vectorized.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index e41444e2ab2..589d016419f 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -1,6 +1,7 @@
 from collections import defaultdict
 import logging
 import numpy as np
+import random
 from rootpy.plotting import Hist
 
 from . import BaseHistCollection
@@ -21,6 +22,7 @@ def __init__(self, innerBins, innerLabel='inner', **kwargs):
         # if we want to generalize to N dim, innerBins needs to be an array of innerBins
         # TODO: last dimension should probably be a normal dictionary
         dimensions = kwargs.pop('dimensions', 2)
+        name = kwargs.pop('name', str(hex(random.getrandbits(128)))[2:10])
         if PY3:
             super(VectorizedHistCollection, self).__init__(dimensions)
         else:
@@ -28,7 +30,7 @@ def __init__(self, innerBins, innerLabel='inner', **kwargs):
 
         self._innerBins = innerBins
         self._innerLabel = innerLabel
-        self._innerHist = Hist(100, 0, 100, name='inner')
+        self._innerHist = Hist(100, 0, 100, name=innerLabel + '_' + name)
 
     def __getitem__(self, key):
         if not isinstance(key, (list, np.ndarray, np.generic)):
@@ -147,6 +149,7 @@ def fill(self, x, w=None):
             hist = self._get_hist(i)
             hist.fill_array(x_i, w_i)
 
+# class VectorizedEfficiencyProxy(object):
 
 # def split_input():
 #     a = np.array([1, 12, 1, 10, 50, 10])

From ff54270ae9c5e5c36f05c7c347a49a4e57b92ed1 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 14:56:07 +0100
Subject: [PATCH 18/30] VectorizedHistCollection: making sure inner histogram
 is also filled

---
 cmsl1t/collections/vectorized.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index 589d016419f..c071660512d 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -7,6 +7,7 @@
 from . import BaseHistCollection
 from ..utils.iterators import pairwise
 from .. import PY3
+from ..io import to_root
 
 logger = logging.getLogger(__name__)
 
@@ -22,7 +23,8 @@ def __init__(self, innerBins, innerLabel='inner', **kwargs):
         # if we want to generalize to N dim, innerBins needs to be an array of innerBins
         # TODO: last dimension should probably be a normal dictionary
         dimensions = kwargs.pop('dimensions', 2)
-        name = kwargs.pop('name', str(hex(random.getrandbits(128)))[2:10])
+        self._name = kwargs.pop('name', str(hex(random.getrandbits(128)))[2:10])
+        self._execute_before_write = kwargs.pop('execute_before_write', [])
         if PY3:
             super(VectorizedHistCollection, self).__init__(dimensions)
         else:
@@ -30,7 +32,7 @@ def __init__(self, innerBins, innerLabel='inner', **kwargs):
 
         self._innerBins = innerBins
         self._innerLabel = innerLabel
-        self._innerHist = Hist(100, 0, 100, name=innerLabel + '_' + name)
+        self._innerHist = Hist(100, 0, 100, name=innerLabel + '_' + self._name)
 
     def __getitem__(self, key):
         if not isinstance(key, (list, np.ndarray, np.generic)):
@@ -84,6 +86,11 @@ def inner_fill(self, x, w=None):
             w = np.ones(np.size(x))
         self._innerHist.fill_array(x, w)
 
+    def to_root(self, output_file):
+        for func in self._execute_before_write:
+            func(self)
+        to_root([self, self._innerHist], output_file)
+
 
 class VectorizedBinProxy(object):
 

From e643ec92f49f8380ce1616b669b55f2632da392a Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 15:08:08 +0100
Subject: [PATCH 19/30] removed obsolete test

---
 test/collections/test_vectorized.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index b61fae8598f..90806d9e20d 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -39,8 +39,8 @@ def test_access(collection):
     assert type(collection[innerValues]['test']) == VectorizedHistProxy
 
 
-def test_copy(collection):
-    proxy = VectorizedBinProxy(collection, [1, 12, 1, 50])
+# def test_copy(collection):
+#     proxy = VectorizedBinProxy(collection, [1, 12, 1, 50])
 
 
 @pytest.mark.parametrize(

From 114f53ce00239d864c7b0688be69da8e5900f269 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 16:33:15 +0100
Subject: [PATCH 20/30] CI: removed python 2.7 from tests

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 64dacb89a93..58de9dfc00c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,6 @@ cache:
 language: python
 
 python:
-  - "2.7"
   - "3.6"
 
 env:

From cefda36fb450004f368b4d47bd7ce2bcd80de943 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 17:01:56 +0100
Subject: [PATCH 21/30] fixed boost histogram tests

---
 test/collections/test_boost_histogram.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/collections/test_boost_histogram.py b/test/collections/test_boost_histogram.py
index 183d06d02ea..6b30dd0e176 100644
--- a/test/collections/test_boost_histogram.py
+++ b/test/collections/test_boost_histogram.py
@@ -25,5 +25,7 @@ def test_fill():
     pileup = np.repeat(pileup, repeat, axis=0)
     # expand pileup to size ets
     assert len(pileup) == len(ets.content)
+    # weights are not yet supported
     # hist.fill(pileup, ets.content, bh.weight(weights))
-    hist(pileup, ets.content)
+    hist.fill(pileup, ets.content)
+    # hist(pileup, ets.content)

From 09178f4a377bce14849ad2bdea5f693775db448d Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Thu, 18 Jul 2019 17:26:49 +0100
Subject: [PATCH 22/30] added test for split_input and added
 awkward.JaggedArray check

---
 cmsl1t/collections/vectorized.py    | 31 +++++++++++++++--------------
 test/collections/test_vectorized.py | 25 ++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index c071660512d..e79588ff9d3 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -1,3 +1,4 @@
+import awkward
 from collections import defaultdict
 import logging
 import numpy as np
@@ -17,6 +18,19 @@ def extend(arr1, starts, stops):
     return np.repeat(arr1, repeat, axis=0)
 
 
+def split_input(inner_indices, x, w):
+    content = x
+    if hasattr(x, 'starts'):
+        inner_indices = extend(inner_indices, x.starts, x.stops)
+        content = x.content
+
+    for u in np.unique(inner_indices):
+        mask = inner_indices == u
+        if not isinstance(mask, (list, np.ndarray)):
+            mask = np.array([mask])
+        yield u, content[mask], w[mask]
+
+
 class VectorizedHistCollection(BaseHistCollection):
 
     def __init__(self, innerBins, innerLabel='inner', **kwargs):
@@ -129,30 +143,17 @@ def __init__(self, bin_proxy, hist_name):
         self._bin_proxy = bin_proxy
         self._hist_name = hist_name
 
-    def _split_input(self, x, w):
-        inner_indices = self._bin_proxy._inner_indices
-        content = x
-        if hasattr(x, 'starts'):
-            inner_indices = extend(inner_indices, x.starts, x.stops)
-            content = x.content
-
-        for u in np.unique(inner_indices):
-            mask = inner_indices == u
-            if not isinstance(mask, (list, np.ndarray)):
-                mask = np.array([mask])
-            yield u, content[mask], w[mask]
-
     def _get_hist(self, inner_index):
         hist_name = self._bin_proxy.collection.get_hist_name(self._hist_name, inner_index)
         return defaultdict.__getitem__(self._bin_proxy.collection, inner_index)[hist_name]
 
     def fill(self, x, w=None):
-        if not isinstance(x, (list, np.ndarray)):
+        if not isinstance(x, (list, np.ndarray, awkward.JaggedArray)):
             x = np.array([x])
         if w is None:
             n = np.size(x.content) if hasattr(x, 'content') else np.size(x)
             w = np.ones(n)
-        for i, x_i, w_i in self._split_input(x, w):
+        for i, x_i, w_i in split_input(self._bin_proxy._inner_indices, x, w):
             hist = self._get_hist(i)
             hist.fill_array(x_i, w_i)
 
diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index 90806d9e20d..b04445e7516 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from cmsl1t.collections import VectorizedHistCollection
-from cmsl1t.collections.vectorized import VectorizedBinProxy, VectorizedHistProxy, extend
+from cmsl1t.collections.vectorized import VectorizedBinProxy, VectorizedHistProxy, extend, split_input
 
 
 @pytest.fixture
@@ -110,3 +110,26 @@ def test_extend():
     ])
     innerValues = extend(innerValues, outerValues.starts, outerValues.stops)
     assert len(innerValues) == len(outerValues.content)
+
+def test_split_input():
+    innerValues = [1, 12, 1, 50]
+    outerValues = awkward.fromiter([
+        [60, 50, 40, 30, 20],
+        [32, 23],
+        [56, 34, 31],
+        [],
+    ])
+    weights = np.ones(len(outerValues.content))
+
+    expected = [
+        (1, [60, 50, 40, 30, 20, 56, 34, 31], list(np.ones(8))),
+        [12, [32, 23], list(np.ones(2))],
+    ]
+    results = list(split_input(innerValues, outerValues, weights))
+    assert len(results) == len(expected)
+    for r, e in zip(results, expected):
+        i, o, w = r
+        i_e, o_e, w_e = e
+        assert i == i_e
+        assert o.tolist() == o_e
+        assert w.tolist() == w_e

From 88168fdb057289d2612924471b230e185ccf7312 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 19 Jul 2019 10:33:17 +0100
Subject: [PATCH 23/30] removed unused variable in _create_hist_names

---
 cmsl1t/collections/vectorized.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index e79588ff9d3..d337c52f7c4 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -88,7 +88,7 @@ def add(self, name, bins, hist_type=Hist, **kwargs):
             len(names), ', '.join(names)))
 
     def _create_hist_names(self, name):
-        for i, (lowerEdge, upperEdge) in enumerate(pairwise(self._innerBins)):
+        for lowerEdge, upperEdge in pairwise(self._innerBins):
             yield f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
 
     def get_hist_name(self, name, innerIndex):

From 51d44d3be7663276e1bbeb9bbae29448a11a6e33 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 19 Jul 2019 10:34:23 +0100
Subject: [PATCH 24/30] fixed pep8 error in test_vectorized

---
 test/collections/test_vectorized.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index b04445e7516..3005c9141d0 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -111,6 +111,7 @@ def test_extend():
     innerValues = extend(innerValues, outerValues.starts, outerValues.stops)
     assert len(innerValues) == len(outerValues.content)
 
+
 def test_split_input():
     innerValues = [1, 12, 1, 50]
     outerValues = awkward.fromiter([

From 838701e0518196fa7245f0958016c98ee0e02a99 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 2 Aug 2019 11:00:51 +0100
Subject: [PATCH 25/30] removed cmsl1t.PY3 variable

---
 cmsl1t/__init__.py               | 3 ---
 cmsl1t/collections/vectorized.py | 6 +-----
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/cmsl1t/__init__.py b/cmsl1t/__init__.py
index ee760307d2c..3cd437f6817 100644
--- a/cmsl1t/__init__.py
+++ b/cmsl1t/__init__.py
@@ -2,7 +2,6 @@
 import logging
 import os
 from os import path
-import sys
 
 
 __version__ = '0.5.1'
@@ -30,5 +29,3 @@
     PROJECT_ROOT = path.abspath(path.join(HERE, path.pardir))
 else:
     PROJECT_ROOT = os.environ['PROJECT_ROOT']
-
-PY3 = sys.version_info[0] == 3
diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index d337c52f7c4..1318ec1af1e 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -7,7 +7,6 @@
 
 from . import BaseHistCollection
 from ..utils.iterators import pairwise
-from .. import PY3
 from ..io import to_root
 
 logger = logging.getLogger(__name__)
@@ -39,10 +38,7 @@ def __init__(self, innerBins, innerLabel='inner', **kwargs):
         dimensions = kwargs.pop('dimensions', 2)
         self._name = kwargs.pop('name', str(hex(random.getrandbits(128)))[2:10])
         self._execute_before_write = kwargs.pop('execute_before_write', [])
-        if PY3:
-            super(VectorizedHistCollection, self).__init__(dimensions)
-        else:
-            BaseHistCollection.__init__(self, dimensions)
+        super(VectorizedHistCollection, self).__init__(dimensions)
 
         self._innerBins = innerBins
         self._innerLabel = innerLabel

From 841cea0a92030061e453926892f23954696523e0 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 2 Aug 2019 11:03:31 +0100
Subject: [PATCH 26/30] VectorizedHistCollection: count tuple as a valid
 iterable

---
 cmsl1t/collections/vectorized.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index 1318ec1af1e..b145ddc7e89 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -25,8 +25,8 @@ def split_input(inner_indices, x, w):
 
     for u in np.unique(inner_indices):
         mask = inner_indices == u
-        if not isinstance(mask, (list, np.ndarray)):
-            mask = np.array([mask])
+        if not isinstance(mask, (tuple, list, np.ndarray, np.generic)):
+            mask = np.array(mask)
         yield u, content[mask], w[mask]
 
 
@@ -45,8 +45,8 @@ def __init__(self, innerBins, innerLabel='inner', **kwargs):
         self._innerHist = Hist(100, 0, 100, name=innerLabel + '_' + self._name)
 
     def __getitem__(self, key):
-        if not isinstance(key, (list, np.ndarray, np.generic)):
-            key = np.array([key])
+        if not isinstance(key, (tuple, list, np.ndarray, np.generic)):
+            key = np.array(key)
         real_keys = self._get_inner_indices(key)
         return VectorizedBinProxy(self, real_keys)
         # return [defaultdict.__getitem__(self, k) for k in real_keys.tolist()]
@@ -144,8 +144,8 @@ def _get_hist(self, inner_index):
         return defaultdict.__getitem__(self._bin_proxy.collection, inner_index)[hist_name]
 
     def fill(self, x, w=None):
-        if not isinstance(x, (list, np.ndarray, awkward.JaggedArray)):
-            x = np.array([x])
+        if not isinstance(x, (tuple, list, np.ndarray, awkward.JaggedArray)):
+            x = np.array(x)
         if w is None:
             n = np.size(x.content) if hasattr(x, 'content') else np.size(x)
             w = np.ones(n)

From da13c487238a0bbfad9c20c5a0846571591230c2 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 2 Aug 2019 11:20:39 +0100
Subject: [PATCH 27/30] VectorizedHistCollection.add -->
 VectorizedHistCollection.insert

---
 cmsl1t/analyzers/demo_analyzer.py   | 2 +-
 cmsl1t/collections/vectorized.py    | 2 +-
 test/collections/test_vectorized.py | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmsl1t/analyzers/demo_analyzer.py b/cmsl1t/analyzers/demo_analyzer.py
index 1a0a8a062e8..3af964fea69 100644
--- a/cmsl1t/analyzers/demo_analyzer.py
+++ b/cmsl1t/analyzers/demo_analyzer.py
@@ -37,7 +37,7 @@ def prepare_for_events(self, reader):
         list(map(add_met_variable, self.met_calcs))
 
         for met, config in self.met_calcs.items():
-            self.hists.add(met, bins=bins, title=config['title'])
+            self.hists.insert(met, bins=bins, title=config['title'])
         return True
 
     def reload_histograms(self, input_file):
diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index b145ddc7e89..ce1ee07675f 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -63,7 +63,7 @@ def _get_inner_indices(self, values):
         '''
         return np.digitize(values, self._innerBins)
 
-    def add(self, name, bins, hist_type=Hist, **kwargs):
+    def insert(self, name, bins, hist_type=Hist, **kwargs):
         title = kwargs.pop('title', name)
         bins = np.asarray(bins)
         if bins.size == 0:
diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index 3005c9141d0..2ca73e169fc 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -27,12 +27,12 @@ def test_inner_index(collection, values, expected):
 
 def test_add(collection):
     assert len(collection) == 0
-    collection.add('test', bins=[35, 90, 120])
+    collection.insert('test', bins=[35, 90, 120])
     assert len(collection) == len(collection._innerBins) - 1
 
 
 def test_access(collection):
-    collection.add('test', bins=[35, 90, 120])
+    collection.insert('test', bins=[35, 90, 120])
     innerValues = [1, 12, 1, 50]
     assert collection[innerValues] == collection[1] + collection[12] + collection[1] + collection[50]
     # assert type(collection[innerValues]) == Hist
@@ -92,7 +92,7 @@ def test_fill(collection):
     ]
 
     hist_name = 'test'
-    collection.add(hist_name, bins=[35, 90, 120])
+    collection.insert(hist_name, bins=[35, 90, 120])
     weights = np.ones(len(outerValues.content))
     collection[innerValues][hist_name].fill(outerValues, weights)
     for i in range(len(np.unique(innerValues))):

From c1c9a7c3c9f685f2b1393f1b517155336c99061c Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 2 Aug 2019 14:31:03 +0100
Subject: [PATCH 28/30] VectorizedHistCollection: replaced defaultdict with
 super() calls & using innerBins for innerHist

---
 cmsl1t/collections/vectorized.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index ce1ee07675f..7a776db00cb 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -42,7 +42,7 @@ def __init__(self, innerBins, innerLabel='inner', **kwargs):
 
         self._innerBins = innerBins
         self._innerLabel = innerLabel
-        self._innerHist = Hist(100, 0, 100, name=innerLabel + '_' + self._name)
+        self._innerHist = Hist(innerBins, name=innerLabel + '_' + self._name)
 
     def __getitem__(self, key):
         if not isinstance(key, (tuple, list, np.ndarray, np.generic)):
@@ -70,16 +70,17 @@ def insert(self, name, bins, hist_type=Hist, **kwargs):
             logger.error(
                 'No bins specified for histogram {0}'.format(name))
 
-        if name in defaultdict.__getitem__(self, 1):
+        if name in super(VectorizedHistCollection, self).__getitem__(1):
             logger.warning('Histogram {0} already exists!'.format(name))
             return
         names = []
         add_name = names.append
 
         for i, hist_name in enumerate(self._create_hist_names(name)):
-            if i + 1 not in self or hist_name not in defaultdict.__getitem__(self, i + 1):
+            __current_slice = super(VectorizedHistCollection, self).__getitem__(i + 1)
+            if i + 1 not in self or hist_name not in __current_slice:
                 add_name(hist_name)
-                defaultdict.__getitem__(self, i + 1)[hist_name] = hist_type(bins, name=hist_name, title=title)
+                __current_slice[hist_name] = hist_type(bins, name=hist_name, title=title)
         logger.debug('Created {0} histograms: {1}'.format(
             len(names), ', '.join(names)))
 

From 23147d36ee1f14814f4d15eb53da3c21ef02ed6e Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 2 Aug 2019 15:17:08 +0100
Subject: [PATCH 29/30] added tests for different types of weights for
 VectorizedHistCollection

---
 test/collections/test_vectorized.py | 62 +++++++++++++++++++----------
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/test/collections/test_vectorized.py b/test/collections/test_vectorized.py
index 2ca73e169fc..13e6c17260a 100644
--- a/test/collections/test_vectorized.py
+++ b/test/collections/test_vectorized.py
@@ -7,13 +7,43 @@
 
 
 @pytest.fixture
-def collection():
-    innerBins = np.array([0, 10, 15, 20, 30, 999])
-    coll = VectorizedHistCollection(innerBins)
-    # fill for [35, 90, 120]
+def scalarBins():
+    return [0, 10, 15, 20, 30, 999]
+
+
+@pytest.fixture
+def collection(scalarBins):
+    coll = VectorizedHistCollection(scalarBins)
     return coll
 
 
+@pytest.fixture
+def scalarDistribution():
+    return [1, 12, 1, 50]
+
+
+@pytest.fixture
+def vectorDistribution():
+    return awkward.fromiter([
+        [60, 50, 40, 30, 20],
+        [32, 23],
+        [56, 34, 31],
+        [],
+    ])
+
+
+@pytest.fixture(params=['event_weights', 'vector_weights', 'flat_vector_weights'])
+def weights(vectorDistribution, request):
+    if request.param == 'event_weights':
+        return np.ones(np.size(vectorDistribution))
+    if request.param == 'vector_weights':
+        return awkward.JaggedArray.fromoffsets(
+            (vectorDistribution.starts, vectorDistribution.stops),
+            np.ones(np.size(vectorDistribution.content))
+        )
+    return np.ones(np.size(vectorDistribution.content))
+
+
 @pytest.mark.parametrize(
     "values,expected",
     [
@@ -33,10 +63,10 @@ def test_add(collection):
 
 def test_access(collection):
     collection.insert('test', bins=[35, 90, 120])
-    innerValues = [1, 12, 1, 50]
-    assert collection[innerValues] == collection[1] + collection[12] + collection[1] + collection[50]
+    values = [1, 12, 1, 50]
+    assert collection[values] == collection[1] + collection[12] + collection[1] + collection[50]
     # assert type(collection[innerValues]) == Hist
-    assert type(collection[innerValues]['test']) == VectorizedHistProxy
+    assert type(collection[values]['test']) == VectorizedHistProxy
 
 
 # def test_copy(collection):
@@ -77,26 +107,18 @@ def test_split(bins, x, expected):
         assert chunk.tolist() == exp.tolist()
 
 
-def test_fill(collection):
-    innerValues = [1, 12, 1, 50]
-    outerValues = awkward.fromiter([
-        [60, 50, 40, 30, 20],
-        [32, 23],
-        [56, 34, 31],
-        [],
-    ])
+def test_fill(collection, scalarDistribution, vectorDistribution, weights):
     expected = [
         [4.0, 4.0, 0.0, 0.0],
         [2.0, 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, 0.0],
     ]
-
     hist_name = 'test'
     collection.insert(hist_name, bins=[35, 90, 120])
-    weights = np.ones(len(outerValues.content))
-    collection[innerValues][hist_name].fill(outerValues, weights)
-    for i in range(len(np.unique(innerValues))):
-        hist = collection[innerValues][hist_name]._get_hist(i + 1)
+    # event_weights = np.ones(np.size(vectorDistribution.content))
+    collection[scalarDistribution][hist_name].fill(vectorDistribution, weights)
+    for i in range(len(np.unique(scalarDistribution))):
+        hist = collection[scalarDistribution][hist_name]._get_hist(i + 1)
         assert list(hist.y(overflow=True)) == expected[i]
 
 

From cb28ed4a4a20d2078b3b0f2bfd23e3f493959b17 Mon Sep 17 00:00:00 2001
From: kreczko <lkreczko@googlemail.com>
Date: Fri, 2 Aug 2019 15:21:45 +0100
Subject: [PATCH 30/30] extended VectorizedHistProxy to allow event weights and
 per-object weigths

---
 cmsl1t/collections/vectorized.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
index 7a776db00cb..31fea7e29f8 100644
--- a/cmsl1t/collections/vectorized.py
+++ b/cmsl1t/collections/vectorized.py
@@ -19,15 +19,21 @@ def extend(arr1, starts, stops):
 
 def split_input(inner_indices, x, w):
     content = x
+    weights = w
     if hasattr(x, 'starts'):
         inner_indices = extend(inner_indices, x.starts, x.stops)
         content = x.content
+    if hasattr(w, 'starts'):
+        weights = w.content
+
+    if np.size(weights) < np.size(content) and hasattr(x, 'starts'):
+        weights = extend(weights, x.starts, x.stops)
 
     for u in np.unique(inner_indices):
         mask = inner_indices == u
         if not isinstance(mask, (tuple, list, np.ndarray, np.generic)):
             mask = np.array(mask)
-        yield u, content[mask], w[mask]
+        yield u, content[mask], weights[mask]
 
 
 class VectorizedHistCollection(BaseHistCollection):
@@ -147,6 +153,7 @@ def _get_hist(self, inner_index):
     def fill(self, x, w=None):
         if not isinstance(x, (tuple, list, np.ndarray, awkward.JaggedArray)):
             x = np.array(x)
+
         if w is None:
             n = np.size(x.content) if hasattr(x, 'content') else np.size(x)
             w = np.ones(n)