cms-l1t-offline · kreczko · Aug 6, 2019 · Jul 10, 2019 · May 3, 2019 · May 31, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -9,7 +9,6 @@ cache:
 language: python
 
 python:
-  - "2.7"
   - "3.6"
 
 env:

diff --git a/cmsl1t/__init__.py b/cmsl1t/__init__.py
@@ -1,7 +1,8 @@
 from __future__ import absolute_import
+import logging
 import os
 from os import path
-import logging
+
 
 __version__ = '0.5.1'
 
@@ -22,8 +23,8 @@
 logger.addHandler(ch)
 
 if 'PROJECT_ROOT' not in os.environ:
-    logger.warn("Could not find environmental variable 'PROJECT_ROOT'")
-    logger.warn("You should to run 'source setup.sh' first!")
+    logger.warning("Could not find environmental variable 'PROJECT_ROOT'")
+    logger.warning("You should to run 'source setup.sh' first!")
     HERE = path.dirname(path.abspath(__file__))
     PROJECT_ROOT = path.abspath(path.join(HERE, path.pardir))
 else:

diff --git a/cmsl1t/analyzers/demo_analyzer.py b/cmsl1t/analyzers/demo_analyzer.py
@@ -6,33 +6,38 @@
 import numpy as np
 
 from .BaseAnalyzer import BaseAnalyzer
-from cmsl1t.collections import EfficiencyCollection
+from cmsl1t.collections import EfficiencyCollection, VectorizedHistCollection
 
 
 class Analyzer(BaseAnalyzer):
 
     def __init__(self, **kwargs):
         super(Analyzer, self).__init__(**kwargs)
 
-        self.met_calcs = dict(
-            RecalcL1EmuMETNot28=dict(
+        self.met_calcs = {
+            self.name + '_' + 'RecalcL1EmuMETNot28': dict(
                 title="Emulated MET, |ieta|<28",
                 attr='l1MetNot28'),
-            RecalcL1EmuMETNot28HF=dict(
+            self.name + '_' + 'RecalcL1EmuMETNot28HF': dict(
                 title="Emulated MET, |ieta|!=28",
                 attr='l1MetNot28HF'),
-        )
+        }
 
     def prepare_for_events(self, reader):
         bins = np.arange(0, 200, 25)
         thresholds = [70, 90, 110]
         puBins = list(range(0, 50, 10)) + [999]
 
+        self.hists = VectorizedHistCollection(innerBins=puBins, innerLabel='pu')
+
         self.efficiencies = EfficiencyCollection(pileupBins=puBins)
         add_met_variable = partial(
             self.efficiencies.add_variable,
             bins=bins, thresholds=thresholds)
         list(map(add_met_variable, self.met_calcs))
+
+        for met, config in self.met_calcs.items():
+            self.hists.insert(met, bins=bins, title=config['title'])
         return True
 
     def reload_histograms(self, input_file):
@@ -43,16 +48,19 @@ def reload_histograms(self, input_file):
     def fill_histograms(self, entry, event):
         pileup = event['Vertex_nVtx']
         self.efficiencies.set_pileup(pileup)
+        self.hists.inner_fill(pileup)
 
         offlineMetBE = event.Sums_caloMetBE
         for name, config in self.met_calcs.items():
             onlineMet = event[config['attr']]
             onlineMet = onlineMet.mag
             self.efficiencies.fill_array(name, offlineMetBE, onlineMet)
+            self.hists[pileup][name].fill(offlineMetBE)
         return True
 
     def write_histograms(self):
-        self.efficiencies.to_root(self.get_histogram_filename())
+        self.efficiencies.to_root(self.get_histogram_filename().replace('.root', '_efficiencies.root'))
+        self.hists.to_root(self.get_histogram_filename())
         return True
 
     def make_plots(self):

diff --git a/cmsl1t/analyzers/jetMet_analyzer.py b/cmsl1t/analyzers/jetMet_analyzer.py
@@ -119,7 +119,7 @@ def __init__(self, **kwargs):
 
         lumiMuDict = dict()
         run_lumi_csv = os.path.join(cmsl1t.PROJECT_ROOT, 'run_lumi.csv')
-        with open(run_lumi_csv) as runLumiFile:
+        with open(run_lumi_csv, 'rb') as runLumiFile:
             reader = csv.reader(runLumiFile, delimiter=',')
             for line in reader:
                 lumiMuDict[(int(line[1]), int(line[2]))] = float(line[3])
@@ -361,8 +361,8 @@ def fill_histograms(self, entry, event):
         # pileup = self._lumiMu[(event['run'], event['lumi'])]
         pileup = 51
         # print pileup
-        if pileup >= 60 or pileup < 50:
-            return True
+        # if pileup >= 60 or pileup < 50:
+        #    return True
 
         for name in self._sumTypes:
             if 'pfMET' in name and not pfMetFilter(event):

diff --git a/cmsl1t/collections/__init__.py b/cmsl1t/collections/__init__.py
@@ -5,10 +5,12 @@
 from .by_pileup import HistogramsByPileUpCollection
 from .resolution import ResolutionCollection
 from .efficiency import EfficiencyCollection
+from .vectorized import VectorizedHistCollection
 
 __all__ = [
     'BaseHistCollection',
     'HistogramsByPileUpCollection',
     'ResolutionCollection',
     'EfficiencyCollection',
+    'VectorizedHistCollection',
 ]
diff --git a/cmsl1t/collections/base.py b/cmsl1t/collections/base.py
@@ -20,10 +20,10 @@
 logger = logging.getLogger(__name__)
 
 
-def create_n_dim_dict(dimensions, initiaValue=0):
+def create_n_dim_dict(dimensions, initialValue=0):
     if dimensions < 1:
-        return initiaValue
-    factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initiaValue=initiaValue)
+        return initialValue
+    factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initialValue=initialValue)
     return defaultdict(factory)
 
 
@@ -40,20 +40,20 @@ def create_n_dim_dict(dimensions, initiaValue=0):
 
 def len_n_dim_dict(dictionary, dimensions):
     if dimensions <= 1:
-        return len(dictionary)
+        return len(dictionary.keys())
     return sum(len_n_dim_dict(v, dimensions - 1)
                for v in six.itervalues(dictionary))
 
 
 class BaseHistCollection(defaultdict):
 
-    def __init__(self, dimensions, initiaValue=0):
+    def __init__(self, dimensions, initialValue=0):
         '''
             For each dimension create a dictionary
         '''
         # TODO: add possibility for different lambda expresions for each
         # dimension. This will allow to have custom dicts in certain dimensions
-        factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initiaValue=initiaValue)
+        factory = partial(create_n_dim_dict, dimensions=dimensions - 1, initialValue=initialValue)
         if sys.version_info[0] < 3:
             defaultdict.__init__(self, factory)
         else:

diff --git a/cmsl1t/collections/by_pileup.py b/cmsl1t/collections/by_pileup.py
@@ -39,7 +39,7 @@ def add(self, hist_name, bins=[]):
                 'No bins specified for histogram {0}'.format(hist_name))
 
         if hist_name in self[self._pileupBins[0]].keys():
-            logger.warn('Histogram {0} already exists!'.format(hist_name))
+            logger.warning('Histogram {0} already exists!'.format(hist_name))
             return
         hist_names = []
         add_name = hist_names.append

diff --git a/cmsl1t/collections/efficiency.py b/cmsl1t/collections/efficiency.py
@@ -100,7 +100,7 @@ def add_variable(self, variable, bins, thresholds):
         """
         # TODO: this will no longer work since 1st dimension is pileup
         if variable in self.keys():
-            logger.warn('Variable {0} already exists!')
+            logger.warning('Variable {0} already exists!')
             return
         self._thresholds[variable] = thresholds
         hist_names = []
@@ -123,7 +123,7 @@ def fill(self, hist_name, recoValue, l1Value, w=1.0):
             logger.error('Histogram {0} does not exist'.format(hist_name))
             return
         if hist_name not in self._thresholds:
-            logger.warn('No valid current thresholds.')
+            logger.warning('No valid current thresholds.')
         for threshold in self._thresholds[hist_name]:
             h[threshold].fill(recoValue, l1Value, w)
 
@@ -136,7 +136,7 @@ def fill_array(self, hist_name, recoValue, l1Value, w=None):
             logger.error('Histogram {0} does not exist'.format(hist_name))
             return
         if hist_name not in self._thresholds:
-            logger.warn('No valid current thresholds.')
+            logger.warning('No valid current thresholds.')
         for threshold in self._thresholds[hist_name]:
             h[threshold].fill_array(recoValue, l1Value, w)
 

diff --git a/cmsl1t/collections/resolution.py b/cmsl1t/collections/resolution.py
@@ -55,15 +55,15 @@ def fill(self, hist_name, x, w=1.0):
             logger.error('Histogram {0} does not exist'.format(hist_name))
             return
         if not self._currentRegions:
-            logger.warn(
+            logger.warning(
                 'No valid current regions. Did you set_region_by_eta()?')
         for region in self._currentRegions:
             h[region].fill(x, w)
 
     def add_variable(self, variable, bins=[]):
         from rootpy.plotting import Hist
         if variable in self.keys():
-            logger.warn('Variable {0} already exists!')
+            logger.warning('Variable {0} already exists!')
             return
         hist_names = []
         add_name = hist_names.append

diff --git a/cmsl1t/collections/vectorized.py b/cmsl1t/collections/vectorized.py
@@ -0,0 +1,171 @@
+import awkward
+from collections import defaultdict
+import logging
+import numpy as np
+import random
+from rootpy.plotting import Hist
+
+from . import BaseHistCollection
+from ..utils.iterators import pairwise
+from ..io import to_root
+
+logger = logging.getLogger(__name__)
+
+
+def extend(arr1, starts, stops):
+    repeat = stops - starts
+    return np.repeat(arr1, repeat, axis=0)
+
+
+def split_input(inner_indices, x, w):
+    content = x
+    weights = w
+    if hasattr(x, 'starts'):
+        inner_indices = extend(inner_indices, x.starts, x.stops)
+        content = x.content
+    if hasattr(w, 'starts'):
+        weights = w.content
+
+    if np.size(weights) < np.size(content) and hasattr(x, 'starts'):
+        weights = extend(weights, x.starts, x.stops)
+
+    for u in np.unique(inner_indices):
+        mask = inner_indices == u
+        if not isinstance(mask, (tuple, list, np.ndarray, np.generic)):
+            mask = np.array(mask)
+        yield u, content[mask], weights[mask]
+
+
+class VectorizedHistCollection(BaseHistCollection):
+
+    def __init__(self, innerBins, innerLabel='inner', **kwargs):
+        # if we want to generalize to N dim, innerBins needs to be an array of innerBins
+        # TODO: last dimension should probably be a normal dictionary
+        dimensions = kwargs.pop('dimensions', 2)
+        self._name = kwargs.pop('name', str(hex(random.getrandbits(128)))[2:10])
+        self._execute_before_write = kwargs.pop('execute_before_write', [])
+        super(VectorizedHistCollection, self).__init__(dimensions)
+
+        self._innerBins = innerBins
+        self._innerLabel = innerLabel
+        self._innerHist = Hist(innerBins, name=innerLabel + '_' + self._name)
+
+    def __getitem__(self, key):
+        if not isinstance(key, (tuple, list, np.ndarray, np.generic)):
+            key = np.array(key)
+        real_keys = self._get_inner_indices(key)
+        return VectorizedBinProxy(self, real_keys)
+        # return [defaultdict.__getitem__(self, k) for k in real_keys.tolist()]
+
+    def _get_inner_indices(self, values):
+        '''
+            Returns the pileup bin corresponding to the provided pileup value.
+             - bin 0 is underflow
+             - bin len(innerBins) is overflow
+
+            :Example:
+                >>> hists = VectorizedHistCollection(innerBins=[0,10,15,20,30,999])
+                >>> hists._get_inner_indices([1, 11, 1111]) # returns [1, 2, 6]
+        '''
+        return np.digitize(values, self._innerBins)
+
+    def insert(self, name, bins, hist_type=Hist, **kwargs):
+        title = kwargs.pop('title', name)
+        bins = np.asarray(bins)
+        if bins.size == 0:
+            logger.error(
+                'No bins specified for histogram {0}'.format(name))
+
+        if name in super(VectorizedHistCollection, self).__getitem__(1):
+            logger.warning('Histogram {0} already exists!'.format(name))
+            return
+        names = []
+        add_name = names.append
+
+        for i, hist_name in enumerate(self._create_hist_names(name)):
+            __current_slice = super(VectorizedHistCollection, self).__getitem__(i + 1)
+            if i + 1 not in self or hist_name not in __current_slice:
+                add_name(hist_name)
+                __current_slice[hist_name] = hist_type(bins, name=hist_name, title=title)
+        logger.debug('Created {0} histograms: {1}'.format(
+            len(names), ', '.join(names)))
+
+    def _create_hist_names(self, name):
+        for lowerEdge, upperEdge in pairwise(self._innerBins):
+            yield f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
+
+    def get_hist_name(self, name, innerIndex):
+        lowerEdge, upperEdge = self._innerBins[innerIndex - 1], self._innerBins[innerIndex]
+        return f"{name}_{self._innerLabel}{lowerEdge}To{upperEdge}"
+
+    def inner_fill(self, x, w=None):
+        if w is None:
+            w = np.ones(np.size(x))
+        self._innerHist.fill_array(x, w)
+
+    def to_root(self, output_file):
+        for func in self._execute_before_write:
+            func(self)
+        to_root([self, self._innerHist], output_file)
+
+
+class VectorizedBinProxy(object):
+
+    def __init__(self, collection, inner_indices):
+        self.collection = collection
+        self._inner_indices = inner_indices
+        # self._inner_values = inner_values
+
+    def __getitem__(self, key):
+        # TODO, if key != string, return a BinProxy of the bin above
+        return VectorizedHistProxy(self, key)
+
+    def __add__(self, other):
+        if self.collection != other.collection:
+            msg = 'Cannot add VectorizedBinProxy for two different collections'
+            logger.error(msg)
+            raise ValueError(msg)
+        self._inner_indices = np.append(self._inner_indices, other._inner_indices)
+        return self
+
+    def __eq__(self, other):
+        if self.collection != other.collection:
+            msg = 'Cannot compare VectorizedBinProxy for two different collections'
+            logger.error(msg)
+            raise ValueError(msg)
+        return self._inner_indices.tolist() == other._inner_indices.tolist()
+
+    def flatten(self):
+        self._inner_indices = np.unique(self._inner_indices)
+        return self
+
+
+class VectorizedHistProxy(object):
+
+    def __init__(self, bin_proxy, hist_name):
+        self._bin_proxy = bin_proxy
+        self._hist_name = hist_name
+
+    def _get_hist(self, inner_index):
+        hist_name = self._bin_proxy.collection.get_hist_name(self._hist_name, inner_index)
+        return defaultdict.__getitem__(self._bin_proxy.collection, inner_index)[hist_name]
+
+    def fill(self, x, w=None):
+        if not isinstance(x, (tuple, list, np.ndarray, awkward.JaggedArray)):
+            x = np.array(x)
+
+        if w is None:
+            n = np.size(x.content) if hasattr(x, 'content') else np.size(x)
+            w = np.ones(n)
+        for i, x_i, w_i in split_input(self._bin_proxy._inner_indices, x, w):
+            hist = self._get_hist(i)
+            hist.fill_array(x_i, w_i)
+
+# class VectorizedEfficiencyProxy(object):
+
+# def split_input():
+#     a = np.array([1, 12, 1, 10, 50, 10])
+#     b = np.array([10, 20, 30, 40, 50, 60])
+#     arg = a.argsort(kind='stable')
+#     offsets, = np.where(np.r_[True, np.diff(a[arg]) > 0])
+#     output = awkward.JaggedArray.fromoffsets(offsets.flatten(), awkward.IndexedArray(arg, b))
diff --git a/cmsl1t/config.py b/cmsl1t/config.py
@@ -314,7 +314,7 @@ def reduce_scope_for_analyzer(self, analyzer_name):
         forbidden_local_settings = ['name', 'input_files']
         for s in forbidden_local_settings:
             if s in analyzer:
-                logger.warn('Setting {0} is forbidden in analysis::analyzers::{1}'.format(s, analyzer_name))
+                logger.warning('Setting {0} is forbidden in analysis::analyzers::{1}'.format(s, analyzer_name))
                 analyzer.pop(s)
 
         global_settings = dict(
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,7 +9,6 @@ cache: @@
     language: python
     python:
-      - "2.7"
       - "3.6"
     env:
@@ Expand Down @@