From 03a90c0878533871fe5550c9b976ce66cde511b5 Mon Sep 17 00:00:00 2001
From: Lucas Corcodilos <lcorcodi@cern.ch>
Date: Sat, 10 Apr 2021 20:53:57 -0500
Subject: [PATCH 1/5] Drop Tpt alpha variation and switch to regular eval

---
 TIMBER/Framework/include/TopPt_weight.h | 57 +++++----------------
 TIMBER/Framework/src/TopPt_weight.cc    | 68 +++++--------------------
 2 files changed, 26 insertions(+), 99 deletions(-)
diff --git a/TIMBER/Framework/include/TopPt_weight.h b/TIMBER/Framework/include/TopPt_weight.h
index ab3da25..5383a74 100644
--- a/TIMBER/Framework/include/TopPt_weight.h
+++ b/TIMBER/Framework/include/TopPt_weight.h
@@ -15,11 +15,9 @@ using namespace ROOT::VecOps;
  * 
  * \f[ \sqrt{e^{\alpha - \beta \cdot p_{T}^{\textrm{Gen} t}} \cdot e^{\alpha - \beta \cdot p_{T}^{\textrm{Gen} \bar{t}} }} \f].
  * 
- * where \f$\alpha = 0.0615\f$ and \f$\beta = 0.0005\f$. See the alpha() and beta() functions
- * to calculate the weights with these parameters varied.
- * 
- * WARNING: You MUST run corr() before alpha() and beta() since these functions
- * recycle information derived from corr().
+ * where \f$\alpha = 0.0615\f$ and \f$\beta = 0.0005\f$. See the eval() function
+ * to calculate the weight plus variations of the \f$\beta\f$ parameter. The \f$\alpha\f$ parameter
+ * is not varied since it would only represent a flat normalization change.
  * 
  */
 class TopPt_weight {
@@ -28,48 +26,19 @@ class TopPt_weight {
                 ROOT::Math::PtEtaPhiMVector jet0, ROOT::Math::PtEtaPhiMVector jet1);
 
     public:
-        TopPt_weight(){};
-        ~TopPt_weight(){};
-        /**
-         * @brief Calculate the top \f$p_T\f$ reweighting value for \f$t\bar{t}\f$ simulation
-         * based on doing gen particle matching. The weight is calculated as 
-         * \f[ \sqrt{e^{\alpha - \beta \cdot p_{T}^{\textrm{Gen } t}} \cdot e^{\alpha - \beta \cdot p_{T}^{\textrm{Gen } \bar{t}} }} \f].
-         * where \f$\alpha = 0.0615\f$ and \f$\beta = 0.0005\f$. See the alpha() and beta() functions
-         * to calculate the weights with these parameters varied.
-         * 
-         * @param GenPart_pdgId NanoAOD branch
-         * @param GenPart_statusFlags NanoAOD branch
-         * @param GenPart_vects Vector of ROOT::Math::PtEtaPhiMVectors (create through hardware::TLvector)
-         * @param jet0 
-         * @param jet1 
-         * @return RVec<float> Will only be length 1. Stored as vector to satisfy TIMBER Correction() requirements
-         */
-        RVec<float> corr(RVec<int> GenPart_pdgId, RVec<int> GenPart_statusFlags, RVec<ROOT::Math::PtEtaPhiMVector> GenPart_vects,
-                ROOT::Math::PtEtaPhiMVector jet0, ROOT::Math::PtEtaPhiMVector jet1);
         /**
-         * @brief Calculate variations of top \f$p_T\f$ weight by varying the \f$\alpha\f$ parameter.
-         * The amount of variation can be changed via the scale arguement which is a 
-         * percent change on \f$\alpha\f$. The output is the weight calculated with the variation
-         * divided by the nominal value. When using MakeWeightCols(), the nominal will be multiplied
-         * by this variation to recover the total weight.
+         * @brief Construct a new TopPt_weight object. No arguments.
          * 
-         * @param GenPart_pdgId NanoAOD branch
-         * @param GenPart_statusFlags NanoAOD branch
-         * @param GenPart_vects Vector of ROOT::Math::PtEtaPhiMVectors (create through hardware::TLvector)
-         * @param jet0 
-         * @param jet1 
-         * @param scale Percent variation on \f$\alpha\f$ parameter.
-         * @return RVec<float> {up, down} variations of the top \f$p_T\f$ reweighting value divided by the nominal weight.
          */
-        RVec<float> alpha(
-                RVec<int> GenPart_pdgId, RVec<int> GenPart_statusFlags, RVec<ROOT::Math::PtEtaPhiMVector> GenPart_vects,
-                ROOT::Math::PtEtaPhiMVector jet0, ROOT::Math::PtEtaPhiMVector jet1, float scale = 0.5);
+        TopPt_weight();
+        ~TopPt_weight(){};
         /**
-         * @brief Calculate variations of the top \f$p_T\f$ weight by varying the \f$\beta\f$ parameter.
+         * @brief Calculate the top \f$p_T\f$ reweighting value for \f$t\bar{t}\f$ simulation
+         * based on doing gen particle matching. Additionally, calculate variations of the top
+         * \f$p_T\f$ weight by varying the \f$\beta\f$ parameter.
          * The amount of variation can be changed via the scale arguement which is a 
-         * percent change on \f$\beta\f$. The output is the weight calculated with the variation
-         * divided by the nominal value. When using MakeWeightCols(), the nominal will be multiplied
-         * by this variation to recover the total weight.
+         * percent change on \f$\beta\f$. There is no corresponding function for \f$\alpha\f$
+         * because the effect is only a flat normalization change.
          * 
          * @param GenPart_pdgId NanoAOD branch
          * @param GenPart_statusFlags NanoAOD branch
@@ -77,9 +46,9 @@ class TopPt_weight {
          * @param jet0 
          * @param jet1 
          * @param scale Percent variation on \f$\beta\f$ parameter.
-         * @return RVec<float> {up, down} variations of the top \f$p_T\f$ reweighting value divided by the nominal weight.
+         * @return RVec<float> {nom, up, down} variations of the top \f$p_T\f$ reweighting value (absolute).
          */
-        RVec<float> beta(
+        RVec<float> eval(
                 RVec<int> GenPart_pdgId, RVec<int> GenPart_statusFlags, RVec<ROOT::Math::PtEtaPhiMVector> GenPart_vects,
                 ROOT::Math::PtEtaPhiMVector jet0, ROOT::Math::PtEtaPhiMVector jet1, float scale = 0.5);
 };
diff --git a/TIMBER/Framework/src/TopPt_weight.cc b/TIMBER/Framework/src/TopPt_weight.cc
index aba0be4..fda442d 100644
--- a/TIMBER/Framework/src/TopPt_weight.cc
+++ b/TIMBER/Framework/src/TopPt_weight.cc
@@ -1,5 +1,7 @@
 #include "../include/TopPt_weight.h"
 
+TopPt_weight::TopPt_weight(){};
+
 std::vector<float> TopPt_weight::matchingGenPt(
         RVec<int> GenPart_pdgId, RVec<int> GenPart_statusFlags, RVec<ROOT::Math::PtEtaPhiMVector> GenPart_vects,
         ROOT::Math::PtEtaPhiMVector jet0, ROOT::Math::PtEtaPhiMVector jet1){
@@ -24,55 +26,7 @@ std::vector<float> TopPt_weight::matchingGenPt(
     return {genTPt,genTbarPt};
 }
 
-RVec<float> TopPt_weight::corr(
-        RVec<int> GenPart_pdgId, RVec<int> GenPart_statusFlags, RVec<ROOT::Math::PtEtaPhiMVector> GenPart_vects,
-        ROOT::Math::PtEtaPhiMVector jet0, ROOT::Math::PtEtaPhiMVector jet1) {
-
-    std::vector<float> matched = matchingGenPt(GenPart_pdgId, GenPart_statusFlags,
-                                          GenPart_vects, jet0, jet1);
-    float genTPt = matched[0];
-    float genTbarPt = matched[1];
-
-    float wTPt = 1.0;
-    if (genTPt > 0){ 
-        wTPt = exp(0.0615 - 0.0005*genTPt);
-    }
-
-    float wTbarPt = 1.0;
-    if (genTbarPt > 0){ 
-        wTbarPt = exp(0.0615 - 0.0005*genTbarPt);
-    }
-
-    return {sqrt(wTPt*wTbarPt)};
-}
-
-RVec<float> TopPt_weight::alpha(
-        RVec<int> GenPart_pdgId, RVec<int> GenPart_statusFlags, RVec<ROOT::Math::PtEtaPhiMVector> GenPart_vects,
-        ROOT::Math::PtEtaPhiMVector jet0, ROOT::Math::PtEtaPhiMVector jet1, float scale){
-
-    std::vector<float> matched = matchingGenPt(GenPart_pdgId, GenPart_statusFlags,
-                                          GenPart_vects, jet0, jet1);
-    float genTPt = matched[0];
-    float genTbarPt = matched[1];
-
-    float wTPt_up = 1.0;
-    float wTPt_down = 1.0;
-    if (genTPt > 0){ 
-        wTPt_up = exp((1+scale)*0.0615 - 0.0005*genTPt) / exp(0.0615 - 0.0005*genTPt);
-        wTPt_down = exp((1-scale)*0.0615 - 0.0005*genTPt) / exp(0.0615 - 0.0005*genTPt);
-    }
-
-    float wTbarPt_up = 1.0;
-    float wTbarPt_down = 1.0;
-    if (genTbarPt > 0){ 
-        wTbarPt_up = exp((1+scale)*0.0615 - 0.0005*genTbarPt) / exp(0.0615 - 0.0005*genTbarPt);
-        wTbarPt_down = exp((1-scale)*0.0615 - 0.0005*genTbarPt) / exp(0.0615 - 0.0005*genTbarPt);
-    }
-
-    return {sqrt(wTPt_up*wTbarPt_up),sqrt(wTPt_down*wTbarPt_down)};
-}
-
-RVec<float> TopPt_weight::beta(
+RVec<float> TopPt_weight::eval(
         RVec<int> GenPart_pdgId, RVec<int> GenPart_statusFlags, RVec<ROOT::Math::PtEtaPhiMVector> GenPart_vects,
         ROOT::Math::PtEtaPhiMVector jet0, ROOT::Math::PtEtaPhiMVector jet1, float scale){
 
@@ -81,19 +35,23 @@ RVec<float> TopPt_weight::beta(
     float genTPt = matched[0];
     float genTbarPt = matched[1];
 
+    float wTPt = 1.0;
     float wTPt_up = 1.0;
     float wTPt_down = 1.0;
     if (genTPt > 0){ 
-        wTPt_up = exp(0.0615 - (1+scale)*0.0005*genTPt) / exp(0.0615 - 0.0005*genTPt);
-        wTPt_down = exp(0.0615 - (1-scale)*0.0005*genTPt) / exp(0.0615 - 0.0005*genTPt);
+        wTPt = exp(0.0615 - 0.0005*genTPt);
+        wTPt_up = exp((1+scale)*0.0615 - 0.0005*genTPt);
+        wTPt_down = exp((1-scale)*0.0615 - 0.0005*genTPt);
     }
 
+    float wTbarPt = 1.0;
     float wTbarPt_up = 1.0;
     float wTbarPt_down = 1.0;
-    if (genTbarPt > 0){ 
-        wTbarPt_up = exp(0.0615 - (1+scale)*0.0005*genTbarPt) / exp(0.0615 - 0.0005*genTbarPt);
-        wTbarPt_down = exp(0.0615 - (1-scale)*0.0005*genTbarPt) / exp(0.0615 - 0.0005*genTbarPt);
+    if (genTbarPt > 0){
+        wTbarPt = exp(0.0615 - 0.0005*genTbarPt);
+        wTbarPt_up = exp((1+scale)*0.0615 - 0.0005*genTbarPt);
+        wTbarPt_down = exp((1-scale)*0.0615 - 0.0005*genTbarPt);
     }
 
-    return {sqrt(wTPt_up*wTbarPt_up),sqrt(wTPt_down*wTbarPt_down)};
+    return {sqrt(wTPt*wTbarPt),sqrt(wTPt_up*wTbarPt_up),sqrt(wTPt_down*wTbarPt_down)};
 }
\ No newline at end of file

From d72d7063cf91a76c2bb1557933b15efd6d107652 Mon Sep 17 00:00:00 2001
From: Lucas Corcodilos <lcorcodi@cern.ch>
Date: Sun, 11 Apr 2021 15:11:19 -0500
Subject: [PATCH 2/5] Analyzer.py: double to single underscore

---
 TIMBER/Analyzer.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py
index 5b07534..821cbc9 100755
--- a/TIMBER/Analyzer.py
+++ b/TIMBER/Analyzer.py
@@ -94,20 +94,20 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs", createAll
 
         super(analyzer, self).__init__()
         self.fileName = fileName 
-        self.__eventsTreeName = eventsTreeName
+        self._eventsTreeName = eventsTreeName
         self.silent = False
 
         # Setup TChains for multiple or single file
-        self.__eventsChain = ROOT.TChain(self.__eventsTreeName) 
+        self._eventsChain = ROOT.TChain(self._eventsTreeName) 
         self.RunChain = ROOT.TChain(runTreeName) 
         if isinstance(self.fileName,list):
             for f in self.fileName:
-                self.__addFile(f)
+                self._addFile(f)
         else:
-            self.__addFile(self.fileName)
+            self._addFile(self.fileName)
         
         # Make base RDataFrame
-        BaseDataFrame = ROOT.RDataFrame(self.__eventsChain) 
+        BaseDataFrame = ROOT.RDataFrame(self._eventsChain) 
         self.BaseNode = Node('base',BaseDataFrame) 
         self.BaseNode.children = [] # protect against memory issue when running over multiple sets in one script
         self.AllNodes = [self.BaseNode] 
@@ -133,7 +133,7 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs", createAll
         # Get LHAID from LHEPdfWeights branch
         self.lhaid = "-1"
         if not self.isData:
-            pdfbranch = self.__eventsChain.GetBranch("LHEPdfWeight")
+            pdfbranch = self._eventsChain.GetBranch("LHEPdfWeight")
             if pdfbranch != None:
                 branch_title = pdfbranch.GetTitle()
                 if branch_title != '': 
@@ -164,7 +164,7 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs", createAll
             if f.split('/')[-1] in skipHeaders: continue
             CompileCpp('#include "%s"\n'%f)
  
-    def __addFile(self,f):
+    def _addFile(self,f):
         '''Add file to TChains being tracked.
 
         Args:
@@ -173,13 +173,13 @@ def __addFile(self,f):
         if f.endswith(".root"): 
             if 'root://' not in f and f.startswith('/store/'):
                 f='root://cms-xrd-global.cern.ch/'+f
-            self.__eventsChain.Add(f)
+            self._eventsChain.Add(f)
             self.RunChain.Add(f)
         elif f.endswith(".txt"): 
             txt_file = open(f,"r")
             for l in txt_file.readlines():
                 thisfile = l.strip()
-                self.__addFile(thisfile)
+                self._addFile(thisfile)
         else:
             raise Exception("File name extension not supported. Please provide a single or list of .root files or a .txt file with a line-separated list of .root files to chain together.")
 
@@ -190,7 +190,7 @@ def Close(self):
             None
         '''
         self.BaseNode.Close()
-        self.__eventsChain.Reset()
+        self._eventsChain.Reset()
 
     def __str__(self):
         '''Call with `print(<analyzer>)` to print a nicely formatted description
@@ -751,7 +751,7 @@ def AddCorrections(self,correctionList,node=None):
 
         return self.SetActiveNode(newNode)
 
-    def __checkCorrections(self,node,correctionNames,dropList):
+    def _checkCorrections(self,node,correctionNames,dropList):
         '''Starting at the provided node, will scale up the tree,
         grabbing all corrections added along the way. This ensures
         corrections from other forks of the analyzer tree are not
@@ -829,7 +829,7 @@ def MakeWeightCols(self,name='',node=None,correctionNames=None,dropList=[],corre
         if name != '': namemod = '_'+name
         else: namemod = ''
 
-        correctionsToApply = self.__checkCorrections(node,correctionNames,dropList)
+        correctionsToApply = self._checkCorrections(node,correctionNames,dropList)
         
         # Build nominal weight first (only "weight", no "uncert")
         weights = {'nominal':''}
@@ -1078,7 +1078,7 @@ def CalibrateVars(self,varCalibDict,evalArgs,newCollectionName,variationsFlag=Tr
         '''
         if node == None: node = self.ActiveNode
         # Type checking and create calibration branches
-        newNode = self.__checkCalibrations(node,varCalibDict,evalArgs)      
+        newNode = self._checkCalibrations(node,varCalibDict,evalArgs)      
         
         # Create the product of weights
         new_columns =  OrderedDict()
@@ -1111,7 +1111,7 @@ def CalibrateVars(self,varCalibDict,evalArgs,newCollectionName,variationsFlag=Tr
 
         return self.SetActiveNode(newNode)
 
-    def __checkCalibrations(self,node,varCalibDict,evalArgs):
+    def _checkCalibrations(self,node,varCalibDict,evalArgs):
         newNode = node
         # Type checking
         if not isinstance(node,Node): raise TypeError('CalibrateVar() does not support argument of type %s for node. Please provide a Node.'%(type(node)))
@@ -1825,7 +1825,6 @@ def __init__(self,name,script,constructor=[],mainFunc='eval',columnList=None,isC
         self._constructor = constructor 
         self._objectName = self.name
         self._call = None
-        # self.__funcNames = self.__funcInfo.keys()        
 
         if not isClone:
             if not self._mainFunc.endswith(mainFunc):

From b8c3c40e5abb7c9b14d9589c3e438528bb2c69b4 Mon Sep 17 00:00:00 2001
From: Lucas Corcodilos <lcorcodi@cern.ch>
Date: Sun, 11 Apr 2021 16:12:58 -0500
Subject: [PATCH 3/5] Switch to CollectionOrganizer

---
 TIMBER/Analyzer.py            |  57 ++---------
 TIMBER/CollectionOrganizer.py | 187 ++++++++++++++++++++++++++++++++++
 test/test_Analyzer.py         |   7 ++
 3 files changed, 203 insertions(+), 48 deletions(-)
 create mode 100644 TIMBER/CollectionOrganizer.py

diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py
index 821cbc9..a3fea3f 100755
--- a/TIMBER/Analyzer.py
+++ b/TIMBER/Analyzer.py
@@ -4,6 +4,7 @@
 
 """
 
+from TIMBER.CollectionOrganizer import CollectionOrganizer
 from TIMBER.Utilities.CollectionGen import BuildCollectionDict, GetKeyValForBranch, StructDef, StructObj
 from TIMBER.Tools.Common import GetHistBinningTuple, CompileCpp, ConcatCols, GetStandardFlags, ExecuteCmd
 from clang import cindex
@@ -151,10 +152,7 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs", createAll
 
         self.ActiveNode = self.BaseNode
         # Auto create collections
-        self.__collectionDict = BuildCollectionDict(BaseDataFrame)
-        self.__builtCollections = []
-        if createAllCollections:
-            self.__createAllCollections(silent=True)
+        self._collectionOrg = CollectionOrganizer(BaseDataFrame)
 
         skipHeaders = []
         if 'CMSSW_BASE' not in os.environ.keys():
@@ -253,7 +251,7 @@ def GetCollectionNames(self):
         Returns:
             list(str): Collection names.
         '''
-        return self.__collectionDict.keys()
+        return self._collectionOrg.collectionDict.keys()
 
     def SetActiveNode(self,node):
         '''Sets the active node.
@@ -405,12 +403,12 @@ def Cut(self,name,cuts,node=None,nodetype=None):
         if isinstance(cuts,CutGroup):
             for c in cuts.keys():
                 cut = cuts[c]
-                newNode = self.__collectionDefCheck(cut, newNode)
+                newNode = self._collectionOrg.CollectionDefCheck(cut, newNode)
                 newNode = newNode.Cut(c,cut,nodetype=nodetype,silent=self.silent)
                 newNode.name = cuts.name+'__'+c
                 self.TrackNode(newNode)
         elif isinstance(cuts,str):
-            newNode = self.__collectionDefCheck(cuts, newNode)
+            newNode = self._collectionOrg.CollectionDefCheck(cuts, newNode)
             newNode = newNode.Cut(name,cuts,nodetype=nodetype,silent=self.silent)
             self.TrackNode(newNode)
         else:
@@ -441,29 +439,22 @@ def Define(self,name,variables,node=None,nodetype=None):
         if isinstance(variables,VarGroup):
             for v in variables.keys():
                 var = variables[v]
-                newNode = self.__collectionDefCheck(var, newNode)
+                newNode = self._collectionOrg.CollectionDefCheck(var, newNode)
                 newNode = newNode.Define(v,var,nodetype=nodetype,silent=self.silent)
+                self._collectionOrg.AddBranch(v)
                 newNode.name = variables.name+'__'+v
                 self.TrackNode(newNode)
             # newNode.name = variables.name
         elif isinstance(variables,str):
-            newNode = self.__collectionDefCheck(variables, newNode)
+            newNode = self._collectionOrg.CollectionDefCheck(variables, newNode)
             newNode = newNode.Define(name,variables,nodetype=nodetype,silent=self.silent)
+            self._collectionOrg.AddBranch(name, str(newNode.DataFrame.GetColumnType(name)))
             self.TrackNode(newNode)
         else:
             raise TypeError("Second argument to Define method must be a string of a single var or of type VarGroup (which provides an OrderedDict).")
 
         return self.SetActiveNode(newNode)
 
-    def __collectionDefCheck(self, action_str, node):
-        newNode = node
-        for c in self.__collectionDict.keys():
-            if (c+'s' not in self.__builtCollections) and re.search(r"\b" + re.escape(c+'s') + r"\b", action_str):
-                print ('MAKING %ss for %s'%(c,action_str))
-                newNode = self.__createCollection(c,self.__collectionDict[c],silent=True,node=newNode)
-                self.__builtCollections.append(c+'s')
-        return self.SetActiveNode(newNode)
-
     # Applies a bunch of action groups (cut or var) in one-shot in the order they are given
     def Apply(self,actionGroupList,node=None,trackEach=True):
         '''Applies a single CutGroup/VarGroup or an ordered list of Groups to the provided node or the #ActiveNode by default.
@@ -550,12 +541,6 @@ def SubCollection(self,name,basecoll,condition,skip=[]):
             else:
                 if condition != '': self.Define(replacementName,'%s[%s]'%(b,name+'_idx'),nodetype='SubCollDefine')
                 else: self.Define(replacementName,b,nodetype='SubCollDefine')
-            
-        branches_to_track = []
-        for v in collBranches:
-            v_with_type = GetKeyValForBranch(self.DataFrame, v)[1]
-            if v_with_type != '': branches_to_track.append(v_with_type)
-        self.__trackNewCollection(name,branches_to_track)
 
         return self.ActiveNode
 
@@ -630,30 +615,6 @@ def MergeCollections(self,name,collectionNames):
 
         self.Define('n'+name,'+'.join(['n'+n for n in collectionNames]),nodetype='MergeDefine')
 
-        self.__trackNewCollection(name,[GetKeyValForBranch(self.DataFrame,collectionNames[0]+'_'+v)[1] for v in vars_to_make])
-
-    def __trackNewCollection(self,name,branches):
-        self.__collectionDict[name] = []
-        for b in branches:
-            self.__collectionDict[name].append(b)
-
-    def __createCollection(self,collection,attributes,silent=True,node=None):
-        init_silent = self.silent
-        self.silent = silent
-        if collection+'s' not in self.__builtCollections:
-            self.__builtCollections.append(collection+'s')
-            CompileCpp(StructDef(collection,attributes))
-            newNode = self.Define(collection+'s', StructObj(collection,attributes),node)
-        else:
-            raise RuntimeError('Collections `%s` already built.'%(collection+'s'))
-        self.silent = init_silent
-        return self.SetActiveNode(newNode)
-
-    def __createAllCollections(self,silent=True):
-        collDict = BuildCollectionDict(self.BaseNode.DataFrame)
-        for c in collDict.keys():
-            self.__createCollection(c,collDict[c],silent)
-
     def CommonVars(self,collections):
         '''Find the common variables between collections.
 
diff --git a/TIMBER/CollectionOrganizer.py b/TIMBER/CollectionOrganizer.py
new file mode 100644
index 0000000..2e35a2d
--- /dev/null
+++ b/TIMBER/CollectionOrganizer.py
@@ -0,0 +1,187 @@
+from TIMBER.Tools.Common import CompileCpp
+import re
+'''
+- Check for current handling of __collectionDict in Analyzer and replace with this
+- Replace instances of BuildCollectionDict and GetKeyValForBranch
+- Write ProcessLine
+- Implement ProcessLine in analyzer
+'''
+class CollectionOrganizer:
+    def __init__(self, rdf):
+        self.baseBranches = [str(b) for b in rdf.GetColumnNames()]
+        self.generateFromRDF(rdf)
+        self.builtCollections = []
+
+    def generateFromRDF(self, rdf):
+        self.collectionDict = {}
+        self.otherBranches = {}
+
+        for b in self.baseBranches:
+            self.AddBranch(b,rdf.GetColumnType(b))
+
+    def parsetype(self, t):
+        if not t.startswith('ROOT::VecOps::RVec<'):
+            collType = False
+        else:
+            collType = str(t).replace('ROOT::VecOps::RVec<','')
+            if collType.endswith('>'):
+                collType = collType[:-1]
+            collType += '&'
+            if 'Bool_t' in collType:
+                collType = collType.replace('Bool_t&','std::_Bit_reference')
+        
+        if collType == '&':
+            collType = ''
+        
+        return collType
+
+    def AddCollection(self, c):
+        if c not in self.collectionDict.keys():
+            self.collectionDict[c] = {'alias': False}
+
+    def GetCollectionAttributes(self, c):
+        return [c for c in self.collectionDict[c] if c != 'alias']
+
+    def AddBranch(self, b, btype=''):
+        collname = b.split('_')[0]
+        varname = '_'.join(b.split('_')[1:])
+        typeStr = self.parsetype(btype)
+        
+        if typeStr == False or varname == '' or 'n'+collname not in self.baseBranches:
+            self.otherBranches[b] = {
+                'type': typeStr,
+                'alias': False
+            }
+        elif varname != '':
+            self.AddCollection(collname)
+            self.collectionDict[collname][varname] = {
+                'type': typeStr,
+                'alias': False
+            }
+
+    def Alias(self, alias, name):
+        # Name is either in otherBranches, is a collection name, or is a full name <collection>_<attr>
+        if name in self.otherBranches.keys():
+            self.otherBranches[name]['alias'] = alias
+        elif name in self.collectionDict.keys():
+            self.collectionDict[name]['alias'] = alias
+        else:
+            collname = name.split('_')[0]
+            varname = '_'.join(name.split('_')[1:])
+            if collname in self.collectionDict.keys():
+                if varname in self.collectionDict[collname].keys():
+                    self.collectionDict[collname][varname]['alias'] = alias
+                else:
+                    raise ValueError('Cannot add alias `%s` because attribute `%s` does not exist in collection `%s`'%(alias,varname,collname))
+            else:
+                raise ValueError('Cannot add alias `%s` because collection `%s` does not exist'%(alias,collname))
+
+    def ProcessLine(self, line):
+        return line
+
+    def BuildCppCollection(self,collection,node,silent=True):
+        newNode = node
+        attributes = []
+        for aname in self.GetCollectionAttributes(collection):
+            attributes.append('%s %s'%(self.collectionDict[collection][aname]['type'], aname))
+
+        if collection+'s' not in self.builtCollections:
+            self.builtCollections.append(collection+'s')
+            CompileCpp(StructDef(collection,attributes))
+            newNode = newNode.Define(collection+'s', StructObj(collection,attributes),silent=silent)
+        else:
+            raise RuntimeError('Collections `%s` already built.'%(collection+'s'))
+
+        return newNode
+
+    def CollectionDefCheck(self, action_str, node):
+        newNode = node
+        for c in self.collectionDict.keys():
+            if re.search(r"\b" + re.escape(c+'s') + r"\b", action_str) and (c+'s' not in self.builtCollections):
+                print ('MAKING %ss for %s'%(c,action_str))
+                newNode = self.BuildCppCollection(c,newNode,silent=True)
+        return newNode
+
+#
+# Utilities already written
+#
+# def BuildCollectionDict(rdf, includeType = True):
+#     '''Turns a list of branches from an RDataFrame into a dictionary of collections.
+
+#     Args:
+#         rdf ([str]): RDataFrame from which to get the branches and types.
+#         includeType (bool, optional): Include the type in the stored variable name (prepended). Defaults to True.
+
+#     Returns:
+#         dict: Dictionary where key is collection name and value is list of variable names.
+#     '''
+#     collections = {}
+#     lone_branch = []
+
+#     branch_names = [str(b) for b in rdf.GetColumnNames()]
+#     for b in branch_names:
+#         collname, varname = GetKeyValForBranch(rdf, b, includeType)
+#         if varname == '' or 'n'+collname not in branch_names:
+#             lone_branch.append(collname)
+#         if collname not in collections.keys():
+#             collections[collname] = []
+#         collections[collname].append(varname)
+
+#     return collections,lone_branch
+
+# def GetKeyValForBranch(rdf, bname, includeType=True):
+#     collname = bname.split('_')[0]
+#     varname = '_'.join(bname.split('_')[1:])
+#     out = (collname, '')
+
+#     branch_names = [str(b) for b in rdf.GetColumnNames()]
+#     if varname == '' or 'n'+collname not in branch_names:
+#         pass
+#     elif varname != '':
+#         collType = str(rdf.GetColumnType(bname)).replace('ROOT::VecOps::RVec<','')
+#         if collType.endswith('>'): collType = collType[:-1]
+#         collType += '&'
+#         if 'Bool_t' in collType: collType = collType.replace('Bool_t&','std::_Bit_reference')
+#         if includeType:
+#             out = (collname, collType+' '+varname)
+#         else:
+#             out = (collname, collType+' '+varname)
+
+#     return out
+
+def StructDef(collectionName, varList):
+    out_str = '''
+struct {0}Struct {{
+        {1}
+        {0}Struct({2}) :
+        {3} {{
+        }};
+}};
+    '''
+    definitions = []
+    ctor_args = []
+    ctor_assign = []
+    for i,v in enumerate(varList):
+        definitions.append('%s; \n'%v)
+        ctor_args.append('%s'%v)
+        ctor_assign.append('%s(%s)'%(v.split(' ')[-1], v.split(' ')[-1]))
+
+    out_str = out_str.format(collectionName, '\t'.join(definitions), ','.join(ctor_args),','.join(ctor_assign))
+    return out_str
+
+def StructObj(collectionName, varList):
+    out_str = '''
+std::vector<{0}Struct> {0}s;
+{0}s.reserve(n{0});
+for (size_t i = 0; i < n{0}; i++) {{
+    {0}s.emplace_back({1});
+}}
+return {0}s;
+'''
+    attr_assignment_str = ''
+    print (varList)
+    for i,v in enumerate(varList):
+        varname = v.split(' ')[-1]
+        attr_assignment_str += '{0}_{1}[i],'.format(collectionName, varname)
+    out_str = out_str.format(collectionName,attr_assignment_str[:-1])
+    return out_str
diff --git a/test/test_Analyzer.py b/test/test_Analyzer.py
index 31c7139..e16a78b 100644
--- a/test/test_Analyzer.py
+++ b/test/test_Analyzer.py
@@ -97,6 +97,9 @@ def test_GetFlagString(self):
         assert self.a.GetFlagString(['HLT_IsoMu24','HLT_IsoMu24_eta2p1','NotReal']) == '((HLT_IsoMu24==1) && (HLT_IsoMu24_eta2p1==1))'
         pass
 
+    def test_CollectionStruct(self):
+        self.a.Cut('structCut','Jets[0].pt > 0')
+
 def test_Groups():
     a = analyzer('examples/GluGluToHToTauTau.root')
     # CompileCpp('TIMBER/Framework/include/common.h') # Compile (via gInterpreter) commonly used c++ code
@@ -113,3 +116,7 @@ def test_Groups():
     a.Apply([test_vg, test_cg])
     rep = a.DataFrame.Report()
     rep.Print()
+
+def test_CollectionGroup():
+    a = analyzer('examples/GluGluToHToTauTau.root')
+    assert ('Jet' in a.GetCollectionNames())

From c61ea984723ffdbf945aaa7b30055d99f5b40054 Mon Sep 17 00:00:00 2001
From: Lucas Corcodilos <lcorcodi@cern.ch>
Date: Tue, 13 Apr 2021 08:07:44 -0500
Subject: [PATCH 4/5] Modify CalibratedVars docs

---
 TIMBER/Analyzer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py
index a3fea3f..7dd4eac 100755
--- a/TIMBER/Analyzer.py
+++ b/TIMBER/Analyzer.py
@@ -1010,14 +1010,14 @@ def CalibrateVars(self,varCalibDict,evalArgs,newCollectionName,variationsFlag=Tr
 
 ```
         This will apply the JES and JER calibrations and their four variations (up,down pair for each) to FatJet_pt and FatJet_mass branches
-        and create a new collection called "CorrectedFatJets" which will be ordered by the new pt values. Note that if you want to correct a different
+        and create a new collection called "CalibratedFatJets" which will be ordered by the new pt values. Note that if you want to correct a different
         collection (ex. AK4 based Jet collection), you need a separate payload and separate call to CalibrateVars because only one collection can be generated at a time.
         Also note that in this example, `jes` and `jer` are initialized with the AK8PFPuppi jets in mind. So if you'd like to apply the JES or JER calibrations to
         AK4 jets, you would also need to define objects like `jesAK4` and `jerAK4`.
 
         The calibrations will always be calculated as a seperate
         column which stores a vector named `<CalibName>__vec` and ordered {nominal, up, down} where "up" and "down" are the absolute weights
-        (ie. not relative to "nominal"). If you'd just like the weights and do not want them applied to any variable, you can provide
+        (ie. "up" and "down" are not relative to "nominal"). If you'd just like the weights and do not want them applied to any variable, you can provide
         an empty dictionary (`{}`) for the varCalibDict argument.
         
         This method will set the new active node to the one with the new collection defined.

From b245886d130c67d45e220eca116a8c8b6b5d92c0 Mon Sep 17 00:00:00 2001
From: Lucas Corcodilos <lcorcodi@cern.ch>
Date: Tue, 13 Apr 2021 08:08:46 -0500
Subject: [PATCH 5/5] Initial work (not working)

---
 TIMBER/Analyzer.py            | 153 +++++++++++++++++++++++++++++-----
 TIMBER/CollectionOrganizer.py |   3 +-
 TIMBER/Tools/AutoJME.py       |  26 ++++--
 3 files changed, 154 insertions(+), 28 deletions(-)

diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py
index 7dd4eac..00eb5a2 100755
--- a/TIMBER/Analyzer.py
+++ b/TIMBER/Analyzer.py
@@ -109,9 +109,9 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs", createAll
         
         # Make base RDataFrame
         BaseDataFrame = ROOT.RDataFrame(self._eventsChain) 
-        self.BaseNode = Node('base',BaseDataFrame) 
+        self.BaseNode = Node('base',BaseDataFrame,nodetype='base') 
         self.BaseNode.children = [] # protect against memory issue when running over multiple sets in one script
-        self.AllNodes = [self.BaseNode] 
+        self.__allNodes = [self.BaseNode] 
         self.Corrections = {} 
 
         # Check if dealing with data
@@ -218,7 +218,17 @@ def DataFrame(self):
 
     def Snapshot(self,columns,outfilename,treename,lazy=False,openOption='RECREATE'):
         '''@see Node#Snapshot'''
-        self.ActiveNode.Snapshot(columns,outfilename,treename,lazy,openOption)
+        if isinstance(self.ActiveNode, Node):
+            self.ActiveNode.Snapshot(columns,outfilename,treename,lazy,openOption)
+        elif isinstance(self.ActiveNode, dict):
+            outfilename = outfilename.replace('.root','')
+            for i,n in enumerate(self.ActiveNode.keys()):
+                if i == len(self.ActiveNode.keys())-1: # Last: Be <lazy> (the provided option)
+                    self.ActiveNode[n].Snapshot(columns,outfilename+'_'+n,treename,lazy,openOption)
+                else: # Else: Be lazy
+                    self.ActiveNode[n].Snapshot(columns,outfilename+'_'+n,treename,True,openOption)
+        else:
+            raise TypeError("analyzer.ActiveNode is not a Node or dict of Nodes.")
 
     def SaveRunChain(self,filename,merge=True):
         '''Save the Run tree (chain of all input files) to filename.
@@ -264,7 +274,7 @@ def SetActiveNode(self,node):
         Returns:
             Node: New #ActiveNode.
         '''
-        if not isinstance(node,Node): raise ValueError('SetActiveNode() does not support argument of type %s. Please provide a Node.'%(type(node)))
+        if not isinstance(node,Node) and not isinstance(node,dict): raise ValueError('SetActiveNode() does not support argument of type %s. Please provide a Node.'%(type(node)))
         else: self.ActiveNode = node
 
         return self.ActiveNode
@@ -285,6 +295,19 @@ def GetBaseNode(self):
         '''
         return self.BaseNode
 
+    @property
+    def AllNodes(self):
+        return self.UnpackNodes(self.__allNodes)
+    
+    def UnpackNodes(self,node_list):
+        out = []
+        for n in node_list:
+            if isinstance(n,Node):
+                out.append(n)
+            elif isinstance(n,dict):
+                out.extend(self.UnpackNodes(node_list.values()))
+        return out
+
     def TrackNode(self,node):
         '''Add a node to track.
         Will add the node to #AllNodes dictionary with key node.name.
@@ -298,10 +321,10 @@ def TrackNode(self,node):
         Returns:
             None
         '''        
-        if isinstance(node,Node):
+        if isinstance(node,Node) or isinstance(node,dict):
             if node.name in self.GetTrackedNodeNames():
                 print ('WARNING: Attempting to track a node with the same name as one that is already being tracked (%s).'%(node.name))
-            self.AllNodes.append(node)
+            self.__allNodes.append(node)
         else:
             raise TypeError('TrackNode() does not support arguments of type %s. Please provide a Node.'%(type(node)))
 
@@ -389,7 +412,7 @@ def Cut(self,name,cuts,node=None,nodetype=None):
         @param cuts (str, CutGroup): A one-line C++ string that evaluates as a bool or a CutGroup object which contains multiple actions that evaluate as bools.
         @param node (Node, optional): Node on which to apply the cut/filter. Defaults to #ActiveNode.
         @param nodetype (str, optional): Defaults to None in which case the new Node will
-            be type "Define".
+            be type "Cut".
 
         Raises:
             TypeError: If argument type is not Node.
@@ -398,8 +421,21 @@ def Cut(self,name,cuts,node=None,nodetype=None):
             Node: New #ActiveNode.
         '''
         if node == None: node = self.ActiveNode
-        newNode = node
+        
+        if isinstance(node,Node):
+            out = self._cutSingle(name,cuts,node,nodetype)
+        elif isinstance(node,dict):
+            newNodes = {}
+            for nkey in node.keys():
+                newNodes[nkey] = self.Cut('%s__%s'%(nkey,name),cuts,node[nkey],nodetype)
+            out = self.SetActiveNode(newNodes)
+        else:
+            raise TypeError('Node argument must be of type Node or dict. Found type `%s`.'%type(node))
+
+        return out
 
+    def _cutSingle(self,name,cuts,node,nodetype=None):
+        newNode = node
         if isinstance(cuts,CutGroup):
             for c in cuts.keys():
                 cut = cuts[c]
@@ -434,8 +470,21 @@ def Define(self,name,variables,node=None,nodetype=None):
             Node: New ActiveNode.
         '''
         if node == None: node = self.ActiveNode
-        newNode = node
 
+        if isinstance(node,Node):
+            out = self._defineSingle(name,variables,node,nodetype)
+        elif isinstance(node,dict):
+            newNodes = {}
+            for nkey in node.keys():
+                newNodes[nkey] = self.Define('%s__%s'%(nkey,name),variables,node[nkey],nodetype)
+            out = self.SetActiveNode(newNodes)
+        else:
+            raise TypeError('Node argument must be of type Node or dict. Found type `%s`.'%type(node))
+
+        return out
+
+    def _defineSingle(self,name,variables,node,nodetype=None):
+        newNode = node
         if isinstance(variables,VarGroup):
             for v in variables.keys():
                 var = variables[v]
@@ -511,6 +560,29 @@ def Discriminate(self,name,discriminator,node=None,passAsActiveNode=None):
 
         return newNodes
 
+    def SplitOnAlias(self,aliasTuples,node=None):
+        if node == None: node = self.ActiveNode
+        newNodes = {}
+
+        checkpoint = node
+        for t in aliasTuples:
+            realname = t[0]
+            alias = t[1]
+            newNode = checkpoint.Clone(realname, inherit=True)
+            newNode.AddAlias(realname,alias)
+            newNodes[realname] = newNode
+
+        return self.SetActiveNode(newNodes)
+
+    def AddAlias(self,name,alias,node=None):
+        if node == None: node = self.ActiveNode
+
+        if isinstance(node,dict):
+            for nkey in node.keys():
+                node[nkey].AddAlias(name,alias)
+        else:   
+            node.AddAlias(name,alias)
+
     def SubCollection(self,name,basecoll,condition,skip=[]):
         '''Creates a collection of a current collection (from a NanoAOD-like format)
         where the array-type branch is slimmed based on some selection.
@@ -1168,13 +1240,13 @@ def PrintNodeTree(self,outfilename,verbose=False,toSkip=[]):
             this_node_name = node.name
             this_node_label = node.name
             if verbose: this_node_label += '\n%s'%textwrap.fill(node.action,50)
-
             graph.add_node(this_node_name, label=this_node_label, type=node.type)
             for child in node.children:
                 graph.add_edge(this_node_name,child.name)
         # Contract egdes where we want nodes dropped
         for skip in toSkip:
             for node in graph.nodes:
+                print (graph.nodes[node])
                 if skip in graph.nodes[node]["type"]:
                     graph = nx.contracted_edge(graph,(list(graph.pred[node].keys())[0],node),self_loops=False)
         # Write out dot and draw
@@ -1248,7 +1320,7 @@ class Node(object):
     '''Class to represent nodes in the DataFrame processing graph. 
     Can make new nodes via Define, Cut, and Discriminate and setup
     relations between nodes (done automatically via Define, Cut, Discriminate)'''
-    def __init__(self, name, DataFrame, action='', nodetype='', children=[], parent=None):
+    def __init__(self, name, DataFrame, action='', nodetype='', children=[], parent=None, aliases=OrderedDict()):
         '''Constructor. Holds the RDataFrame and other associated information
         for tracking in the {@link analyzer}.
 
@@ -1284,6 +1356,10 @@ def __init__(self, name, DataFrame, action='', nodetype='', children=[], parent=
         #
         # The "type" of Node. Can be modified but by default will be either
         # "Define", "Cut", "MergeDefine", "SubCollDefine", or "Correction".
+        ## @var aliases
+        #
+        # Ordered dictionary (collections.OrderedDict) of one-to-one aliases where, if the key
+        # is found on subsequent actions, it will be replaced by the value in the original action string
 
         super(Node, self).__init__()
         self.DataFrame = DataFrame
@@ -1292,6 +1368,10 @@ def __init__(self, name, DataFrame, action='', nodetype='', children=[], parent=
         self.children = children
         self.parent = parent
         self.type = nodetype
+        self.aliases = aliases
+        if not isinstance(self.aliases,OrderedDict):
+            print ('WARNING: Casting input alias dict to OrderedDict. The ordering of the key:value pairs will be random.')
+            self.aliases = OrderedDict(self.aliases)
         
     def Close(self):
         '''Safely deletes Node instance and all descendants.
@@ -1318,20 +1398,33 @@ def __str__(self):
             if not a.startswith('__') and not callable(getattr(self, a)):
                 if a == 'children':
                     out += '\t {:15s} = {}\n'.format(a,[c.name for c in getattr(self,a)])
+                elif a == 'parent' and self.parent != None:
+                    out += '\t {:15s} = {}\n'.format(a, self.parent.name)
                 else:
                     out += '\t {:15s} = {}\n'.format(a,getattr(self,a))
         return out[:-1]
 
-    def Clone(self,name=''):
+    def Clone(self,name='',inherit=False):
         '''Clones Node instance without child information and with new name if specified.
 
         @param name (str, optional): Name for clone. Defaults to current name.
+        @param inherit (bool, optional): Whether the clone should be a child of the current node. Defaults to False.
 
         Returns:
             Node: Clone of current instance.
         '''
-        if name == '':return Node(self.name,self.DataFrame,children=[],action=self.action)
-        else: return Node(name,self.DataFrame,children=[],action=self.action)
+        if name == '': clone_name = self.name
+        else: clone_name = name
+
+        if inherit:
+            clone = Node(clone_name, self.DataFrame, children=[], parent=self, action=self.action, nodetype=self.type, aliases=self.aliases)
+            self.SetChild(clone)
+        else:
+            clone = Node(clone_name, self.DataFrame, children=[], action=self.action, nodetype=self.type, aliases=self.aliases)
+
+        print (clone)
+
+        return clone
 
     def SetChild(self,child,overwrite=False):
         '''Set one of child for the node.
@@ -1391,9 +1484,10 @@ def Define(self,name,var,nodetype=None,silent=False):
         Returns:
             Node: New Node object with new column added.
         '''
+        var = self.ApplyAliases(var)
         if not silent: print('Defining %s: %s' %(name,var))
         newNodeType = 'Define' if nodetype == None else nodetype
-        newNode = Node(name,self.DataFrame.Define(name,var),children=[],parent=self,action=var,nodetype=newNodeType)
+        newNode = Node(name,self.DataFrame.Define(name,var),children=[],parent=self,action=var,nodetype=newNodeType,aliases=self.aliases)
         self.SetChild(newNode)
         return newNode
 
@@ -1409,9 +1503,10 @@ def Cut(self,name,cut,nodetype=None,silent=False):
         Returns:
             Node: New Node object with cut applied.
         '''
+        cut = self.ApplyAliases(cut)
         if not silent: print('Filtering %s: %s' %(name,cut))
         newNodeType = 'Define' if nodetype == None else nodetype
-        newNode = Node(name,self.DataFrame.Filter(cut,name),children=[],parent=self,action=cut,nodetype=newNodeType)
+        newNode = Node(name,self.DataFrame.Filter(cut,name),children=[],parent=self,action=cut,nodetype=newNodeType,aliases=self.aliases)
         self.SetChild(newNode)
         return newNode
 
@@ -1424,9 +1519,10 @@ def Discriminate(self,name,discriminator):
         Returns:
             dict: Dictionary with keys "pass" and "fail" corresponding to the passing and failing Nodes stored as values.
         '''
+        discriminator = self.ApplyAliases(discriminator)
         passfail = {
-            "pass":Node(name+"_pass",self.DataFrame.Filter(discriminator,name+"_pass"),children=[],parent=self,action=discriminator,nodetype='Cut'),
-            "fail":Node(name+"_fail",self.DataFrame.Filter("!("+discriminator+")",name+"_fail"),children=[],parent=self,action="!("+discriminator+")",nodetype='Cut')
+            "pass":Node(name+"_pass",self.DataFrame.Filter(discriminator,name+"_pass"),children=[],parent=self,action=discriminator,nodetype='Cut',aliases=self.aliases),
+            "fail":Node(name+"_fail",self.DataFrame.Filter("!("+discriminator+")",name+"_fail"),children=[],parent=self,action="!("+discriminator+")",nodetype='Cut',aliases=self.aliases)
         }
         self.SetChildren(passfail)
         return passfail
@@ -1469,7 +1565,7 @@ def Range(self, *argv):
         '''
         action_name = 'Range(%s)'%(', '.join([str(a) for a in argv]))
         return Node(self.name+'_range', self.DataFrame.Range(*argv),
-                    action=action_name, nodetype='range', children=[], parent=self)
+                    action=action_name, nodetype='range', children=[], parent=self, aliases=self.aliases)
 
     def Snapshot(self,columns,outfilename,treename,lazy=False,openOption='RECREATE'): # columns can be a list or a regular expression or 'all'
         '''Takes a snapshot of the RDataFrame corresponding to this Node.
@@ -1495,18 +1591,20 @@ def Snapshot(self,columns,outfilename,treename,lazy=False,openOption='RECREATE')
         opts.fMode = openOption
         opts.fCompressionAlgorithm =1 
         opts.fCompressionLevel = 1
-        print("Snapshotting columns: %s"%columns)
         print("Saving tree %s to file %s"%(treename,outfilename))
         if columns == 'all':
             self.DataFrame.Snapshot(treename,outfilename,'',opts)
         elif type(columns) == str:
+            columns = self.ApplyAliases(columns)
+            print("Snapshotting columns: %s"%columns)
             self.DataFrame.Snapshot(treename,outfilename,columns,opts)
         else:
             column_vec = ''
             for c in columns:
                 if c == '': continue
-                column_vec += c+'|'
+                column_vec += self.ApplyAliases(c)+'|'
             column_vec = column_vec[:-1]
+            print("Snapshotting columns: %s"%column_vec)
             self.DataFrame.Snapshot(treename,outfilename,column_vec,opts)
 
     def GetBaseNode(self):
@@ -1520,6 +1618,19 @@ def GetBaseNode(self):
             thisnode = thisnode.parent
         return thisnode
       
+    def AddAlias(self, name, alias):
+        self.aliases[alias] = name
+
+    def ApplyAliases(self,line,regexMatch=''):
+        out = line
+        for alias in self.aliases.keys():
+            if alias in line and len(re.findall(regexMatch,line))>0:
+                print ('\tALIAS: %s -> %s'%(alias,self.aliases[alias]))
+                for i in range(len(re.findall(alias, line))):
+                    out = re.sub(alias,self.aliases[alias],out)
+                
+        return out
+
 ##############################
 # Group class and subclasses #
 ##############################
diff --git a/TIMBER/CollectionOrganizer.py b/TIMBER/CollectionOrganizer.py
index 2e35a2d..46ce0f2 100644
--- a/TIMBER/CollectionOrganizer.py
+++ b/TIMBER/CollectionOrganizer.py
@@ -26,6 +26,8 @@ def parsetype(self, t):
             collType = str(t).replace('ROOT::VecOps::RVec<','')
             if collType.endswith('>'):
                 collType = collType[:-1]
+            elif collType.endswith('> '):
+                collType = collType[:-2]
             collType += '&'
             if 'Bool_t' in collType:
                 collType = collType.replace('Bool_t&','std::_Bit_reference')
@@ -179,7 +181,6 @@ def StructObj(collectionName, varList):
 return {0}s;
 '''
     attr_assignment_str = ''
-    print (varList)
     for i,v in enumerate(varList):
         varname = v.split(' ')[-1]
         attr_assignment_str += '{0}_{1}[i],'.format(collectionName, varname)
diff --git a/TIMBER/Tools/AutoJME.py b/TIMBER/Tools/AutoJME.py
index 0cd88e3..d15b778 100644
--- a/TIMBER/Tools/AutoJME.py
+++ b/TIMBER/Tools/AutoJME.py
@@ -8,7 +8,7 @@
 from TIMBER.Tools.Common import GetJMETag
 from TIMBER.Analyzer import Calibration
 
-def AutoJME(a, jetCollection, year, dataEra=''):
+def AutoJME(a, jetCollection, year, dataEra='',setAlias=True):
     '''Automatic calculation of JES, JER, JMS, and JMR factors and uncertainties
     per-jet per-event and calibration of \f$p_{T}\f$ and mass with associated
     variations performed as well.
@@ -24,10 +24,15 @@ def AutoJME(a, jetCollection, year, dataEra=''):
 
     For data, only recalibrate the jets for the new JECs.
 
+
+
     @param a (analyzer): TIMBER analyzer object which will be manipulated and returned.
     @param jetCollection (str): FatJet or Jet.
     @param year (str): 2016, 2017, 2018, 2017UL, or 2018UL.
     @param dataEra (str, optional): If providing data, include the "era" (A or B or C, etc). Defaults to ''.
+    @param setAlias (bool, optional): If True, then an alias will be created so that any
+        further actions (`Cut` or `Define`) with "<jetCollection>" will be automatically replaced
+        by "Calibrated<jetCollection>". Defaults to True.
 
     Raises:
         ValueError: Provided jet collection is not "FatJet" or "Jet"
@@ -48,14 +53,14 @@ def AutoJME(a, jetCollection, year, dataEra=''):
         raise ValueError("Jet collection name `%s` not supported. Only FatJet or Jet."%jetCollection)
     
     if not a.isData:
-        jes = Calibration("JES","TIMBER/Framework/include/JES_weight.h",
+        jes = Calibration("JES_%s"%jetType,"TIMBER/Framework/include/JES_weight.h",
                 [GetJMETag("JES",year,"MC"),jetType,"",True], corrtype="Calibration")
-        jer = Calibration("JER","TIMBER/Framework/include/JER_weight.h",
+        jer = Calibration("JER_%s"%jetType,"TIMBER/Framework/include/JER_weight.h",
                 [GetJMETag("JER",year,"MC"),jetType], corrtype="Calibration")
         if doMass:
-            jms = Calibration("JMS","TIMBER/Framework/include/JMS_weight.h",
+            jms = Calibration("JMS_%s"%jetType,"TIMBER/Framework/include/JMS_weight.h",
                     [int(year.replace('UL',''))], corrtype="Calibration")
-            jmr = Calibration("JMR","TIMBER/Framework/include/JMR_weight.h",
+            jmr = Calibration("JMR_%s"%jetType,"TIMBER/Framework/include/JMR_weight.h",
                     [int(year.replace('UL',''))], corrtype="Calibration")
 
         calibdict = {"%s_pt"%jetCollection:[jes,jer],"%s_mass"%jetCollection:[jes,jer,jms,jmr]}
@@ -66,7 +71,7 @@ def AutoJME(a, jetCollection, year, dataEra=''):
             jmr: {"jets":"%ss"%jetCollection,"genJets":"%ss"%genJetColl}
         }
     else:
-        jes = Calibration("JES","TIMBER/Framework/include/JES_weight.h",
+        jes = Calibration("JES_%s"%jetType,"TIMBER/Framework/include/JES_weight.h",
                 [GetJMETag("JES",year,dataEraLetter),jetType,"",True], corrtype="Calibration")
         
         calibdict = {"%s_pt"%jetCollection:[jes],"%s_mass"%jetCollection:[jes]}
@@ -75,5 +80,14 @@ def AutoJME(a, jetCollection, year, dataEra=''):
         }
         
     a.CalibrateVars(calibdict,evalargs,"Calibrated%s"%jetCollection,variationsFlag=(not a.isData))
+    if setAlias:
+        aliases = []
+        for v in calibdict.keys():
+            calibnames = [j.name for j in calibdict[v]]
+            for cname in calibnames:
+                for variation in ['nom','up','down']:
+                    aliases.append((v.replace(jetCollection,"Calibrated%s"%jetCollection)+'_'+cname+'__'+variation,v))
+        a.SplitOnAlias(aliases)
+        # a.AddAlias("Calibrated%s"%jetCollection, '\b'+jetCollection)
 
     return a    
\ No newline at end of file