From 2bb4e502018e76435ba785f490bdfb095d943c31 Mon Sep 17 00:00:00 2001 From: lcorcodilos Date: Mon, 12 Oct 2020 16:19:43 -0400 Subject: [PATCH 01/17] Expand analyzer() constructor docstring --- TIMBER/Analyzer.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py index ab74c0e..c672d76 100755 --- a/TIMBER/Analyzer.py +++ b/TIMBER/Analyzer.py @@ -31,7 +31,11 @@ class analyzer(object): the active node and assigns the output node as the new active node""" def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs"): """ - Constructor + Constructor. Setups the tracking of actions on an RDataFrame as nodes. Also + looks up and stores common information in NanoAOD such as the number of generated + events in a file (#genEventCount), the LHA ID of the PDF set in the `LHEPdfWeights` + branch (#lhaid), if the file is data (#isData), and if the file is before NanoAOD + version 6 (#preV6). Args: fileName (str): A ROOT file path or the path to a txt file which contains several ROOT file paths separated by @@ -70,7 +74,11 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs"): ## @var genEventCount # int # - # Number of generated events in imported simulation files. Zero if data. + # Number of generated events in imported simulation files. Zero if not found or data. + ## @var lhaid + # int + # + # LHA ID of the PDF weight set in the NanoAOD. -1 if not found or data. ## @var ActiveNode # Node # From b6b76fef53b828aa4de5aff4d2e76a3d526b95c9 Mon Sep 17 00:00:00 2001 From: lcorcodilos Date: Mon, 12 Oct 2020 16:52:19 -0400 Subject: [PATCH 02/17] Doc for snapshot and add analyzer method --- TIMBER/Analyzer.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py index c672d76..90fcf4d 100755 --- a/TIMBER/Analyzer.py +++ b/TIMBER/Analyzer.py @@ -156,6 +156,10 @@ def DataFrame(self): ''' return self.ActiveNode.DataFrame + def Snapshot(self,columns,outfilename,treename,lazy=False,openOption='RECREATE'): + '''@see Node#Snapshot''' + self.ActiveNode.Snapshot(columns,outfilename,treename,lazy,openOption) + def SetActiveNode(self,node): """Sets the active node. @@ -817,6 +821,17 @@ def Apply(self,actiongrouplist): # IMPORTANT: When writing a variable size array through Snapshot, it is required that the column indicating its size is also written out and it appears before the array in the columns list. # columns should be an empty string if you'd like to keep everything def Snapshot(self,columns,outfilename,treename,lazy=False,openOption='RECREATE'): # columns can be a list or a regular expression or 'all' + '''Takes a snapshot of the RDataFrame corresponding to this Node. + Compression algorithm set to 1 (ZLIB) and compression level are set to 1. + + Args: + columns ([str] or str): List of columns to keep (str) with regex matching. + Provide single string 'all' to include all columns. + outfilename (str): Name of the output file + treename ([type]): Name of the output TTree + lazy (bool, optional): If False, the RDataFrame actions until this point will be executed here. Defaults to False. + openOption (str, optional): TFile opening options. Defaults to 'RECREATE'. + ''' opts = ROOT.RDF.RSnapshotOptions() opts.fLazy = lazy opts.fMode = openOption From c6a119160d46850c891a9ced2b28dfdae895ab44 Mon Sep 17 00:00:00 2001 From: lcorcodilos Date: Tue, 13 Oct 2020 09:12:34 -0400 Subject: [PATCH 03/17] Clarify 2.7 is EOL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0d71e0b..2733705 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ TIMBER (Tree Interface for Making Binned Events with RDataFrame) is an easy-to-u Default arguments assume the use of the NanoAOD format but any ROOT TTree can be processed. ## Quick install -Python 3 is recommended since 2.7 is now no longer supported. Remember to make sure your +Python 3 is recommended since Python 2.7 reached end-of-life on January 1st, 2020. Remember to make sure your ROOT version has been built with Python 3 compatibility. For information on how to do this, see [this explanation](doxysetup/Python3.md). Working in a virtual environment is also recommended. Below are the commands for using virtualenv but From 07f6c50272d36250c0928d3bbc2d3c53671a9b92 Mon Sep 17 00:00:00 2001 From: lcorcodilos Date: Mon, 19 Oct 2020 10:33:09 -0400 Subject: [PATCH 04/17] Update Analyzer docs part1 --- TIMBER/Analyzer.py | 402 +++++++++------ docs/annotated.html | 14 +- ...m_b_e_r_1_1_analyzer_1_1_node-members.html | 18 +- ...ass_t_i_m_b_e_r_1_1_analyzer_1_1_node.html | 468 +++++++++++++++++- ..._e_r_1_1_analyzer_1_1analyzer-members.html | 13 +- ..._t_i_m_b_e_r_1_1_analyzer_1_1analyzer.html | 395 ++++++++++----- ...___analyzer_1_1_test_analyzer-members.html | 96 ++++ ...lasstest___analyzer_1_1_test_analyzer.html | 138 ++++++ ...test___common_1_1_common_test-members.html | 89 ++++ docs/classtest___common_1_1_common_test.html | 112 +++++ .../dir_13e138d54eb8818da29c3992edef070a.html | 80 +++ docs/hierarchy.html | 30 +- docs/index.html | 2 +- docs/inherit_graph_0.map | 2 +- docs/inherit_graph_0.md5 | 2 +- docs/inherit_graph_0.png | Bin 1509 -> 1182 bytes docs/inherit_graph_1.map | 2 +- docs/inherit_graph_1.md5 | 2 +- docs/inherit_graph_1.png | Bin 1897 -> 1364 bytes docs/inherit_graph_11.map | 2 +- docs/inherit_graph_11.md5 | 2 +- docs/inherit_graph_11.png | Bin 911 -> 1332 bytes docs/inherit_graph_12.map | 8 +- docs/inherit_graph_12.md5 | 2 +- docs/inherit_graph_12.png | Bin 1542 -> 19535 bytes docs/inherit_graph_2.map | 2 +- docs/inherit_graph_2.md5 | 2 +- docs/inherit_graph_2.png | Bin 2362 -> 1542 bytes docs/inherit_graph_3.map | 2 +- docs/inherit_graph_3.md5 | 2 +- docs/inherit_graph_3.png | Bin 3226 -> 1182 bytes docs/inherit_graph_4.map | 8 +- docs/inherit_graph_4.md5 | 2 +- docs/inherit_graph_4.png | Bin 1315 -> 19535 bytes docs/inherits.html | 40 +- docs/search/all_0.js | 2 +- docs/search/all_1.js | 4 +- docs/search/all_10.js | 1 - docs/search/all_2.js | 6 +- docs/search/all_3.js | 4 +- docs/search/all_4.js | 4 +- docs/search/all_b.js | 1 + docs/search/all_e.js | 1 - docs/search/all_f.js | 5 +- docs/search/classes_1.js | 7 +- docs/search/classes_2.js | 4 +- docs/search/classes_3.js | 2 +- docs/search/classes_4.js | 2 +- docs/search/classes_5.js | 2 +- docs/search/classes_6.js | 2 +- docs/search/classes_7.js | 3 +- docs/search/classes_8.js | 2 +- docs/search/functions_0.js | 2 +- docs/search/functions_1.js | 4 +- docs/search/functions_2.js | 4 +- docs/search/functions_3.js | 4 +- docs/search/functions_a.js | 5 +- docs/search/searchdata.js | 4 +- docs/search/variables_5.js | 2 +- docs/search/variables_6.js | 2 +- docs/search/variables_7.js | 2 +- docs/search/variables_8.js | 3 +- doxysetup/Doxyfile | 2 + 63 files changed, 1586 insertions(+), 437 deletions(-) create mode 100644 docs/classtest___analyzer_1_1_test_analyzer-members.html create mode 100644 docs/classtest___analyzer_1_1_test_analyzer.html create mode 100644 docs/classtest___common_1_1_common_test-members.html create mode 100644 docs/classtest___common_1_1_common_test.html create mode 100644 docs/dir_13e138d54eb8818da29c3992edef070a.html diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py index 90fcf4d..8661896 100755 --- a/TIMBER/Analyzer.py +++ b/TIMBER/Analyzer.py @@ -28,7 +28,7 @@ class analyzer(object): where nodes are an RDF instance and an action (or series of actions) can transform the RDF to create a new node(s). When using class functions to perform actions, an active node will always be tracked so that the next action uses - the active node and assigns the output node as the new active node""" + the active node and assigns the output node as the new #ActiveNode""" def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs"): """ Constructor. Setups the tracking of actions on an RDataFrame as nodes. Also @@ -50,27 +50,27 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs"): ## @var BaseDataFrame # ROOT.RDataFrame # - # Initial RDataFrame - no modifications + # Initial RDataFrame - no modifications. ## @var BaseNode # Node # - # Initial Node - no modifications + # Initial Node - no modifications. ## @var DataFrames # dict # - # All data frames + # All data frames. ## @var Corrections # dict # - # All corrections added to track + # All corrections added to track. ## @var isData # bool # - # Is data (true) or simulation (false) based on existence of _genEventCount branch + # Is data (true) or simulation (false) based on existence of _genEventCount branch. ## @var preV6 # bool # - # Is pre-NanoAODv6 (true) or not (false) based on existence of _genEventCount branch + # Is pre-NanoAODv6 (true) or not (false) based on existence of _genEventCount branch. ## @var genEventCount # int # @@ -152,7 +152,7 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs"): def DataFrame(self): ''' Returns: - RDataFrame: Dataframe for the active node + RDataFrame: Dataframe for the active node. ''' return self.ActiveNode.DataFrame @@ -161,68 +161,85 @@ def Snapshot(self,columns,outfilename,treename,lazy=False,openOption='RECREATE') self.ActiveNode.Snapshot(columns,outfilename,treename,lazy,openOption) def SetActiveNode(self,node): - """Sets the active node. + '''Sets the active node. Args: - node: Node to set as #ActiveNode + node (Node): Node to set as #ActiveNode. + + Raises: + ValueError: If argument type is not Node. Returns: - New #ActiveNode - """ + Node: New #ActiveNode. + ''' if not isinstance(node,Node): raise ValueError('SetActiveNode() does not support argument of type %s. Please provide a Node.'%(type(node))) else: self.ActiveNode = node return self.ActiveNode def GetActiveNode(self): - """Get the active node. + '''Get the active node. Returns: - Value of #ActiveNode (Node) - """ + Node: Value of #ActiveNode. + ''' return self.ActiveNode def GetBaseNode(self): - """Get the base node. + '''Get the base node. Returns: - Value of #BaseNode (Node) - """ + Node: Value of #BaseNode. + ''' return self.BaseNode def TrackNode(self,node): - """Add a node to track. - - Will add the node to #DataFrames dictionary with key node.name. Will raise ValueError if attempting to overwrite an already tracked Node. + '''Add a node to track. + Will add the node to #DataFrames dictionary with key node.name. Args: - node (Node): Node to start tracking. + node (Node): Node to start tracking. + + Raises: + NameError: If attempting to track nodes of the same name. + TypeError: If argument type is not Node. Returns: None - - """ - + ''' if isinstance(node,Node): if node.name in self.GetTrackedNodeNames(): - raise ValueError('Attempting to track a node with the same name as one that is already being tracked (%s). Please provide a unique node.'%(node.name)) + raise NameError('Attempting to track a node with the same name as one that is already being tracked (%s). Please provide a unique node.'%(node.name)) self.AllNodes.append(node) else: raise TypeError('TrackNode() does not support arguments of type %s. Please provide a Node.'%(type(node))) def GetTrackedNodeNames(self): + ''' + Returns: + [str]: List of names of nodes being tracked. + ''' return [n.name for n in self.AllNodes] def GetCorrectionNames(self): - """Get names of all corrections being tracked. + '''Get names of all corrections being tracked. Returns: - List of Correction keys/names - """ + [str]: List of Correction keys/names. + ''' return self.Corrections.keys() def FilterColumnNames(self,columns,node=None): - '''Takes a list of possible columns and returns only those that exist in the RDataFrame of the supplied node''' + '''Takes a list of possible columns and returns only those that + exist in the RDataFrame of the supplied node. + + Args: + columns ([str]): List of column names (str) + node (Node, optional): Node to compare against. Defaults to #BaseNode. + + Returns: + [str]: List of column names that union with those in the RDataFrame. + ''' if node == None: node = self.BaseNode cols_in_node = node.DataFrame.GetColumnNames() out = [] @@ -236,8 +253,13 @@ def ConcatCols(self,colnames,val='1',connector='&&'): '''Concatenates a list of column names evaluating to a common `val` (usually 1 or 0) with some `connector` (boolean logic operator). + Args: + colnames ([str]): List of column names (str). + val (str, optional): Value to test equality of all columns. Defaults to '1'. + connector (str, optional): C++ boolean logic operator between column equality checks. Defaults to '&&'. + Returns: - str: List concatenated as a string with the assigned evaluations (`val`) and `connector` + str: Concatenated string of the entire evaluation that in C++ will return a bool. ''' concat = '' for i,c in enumerate(colnames): @@ -252,8 +274,8 @@ def ConcatCols(self,colnames,val='1',connector='&&'): return concat def GetTriggerString(self,trigList): - '''Checks input list for missing triggers and drops those missing (FilterColumnNames) - and then concatenates those remaining into an OR string (ConcatCols) + '''Checks input list for missing triggers and drops those missing (#FilterColumnNames) + and then concatenates those remaining into an OR (`||`) string (#ConcatCols) Args: trigList [str]: List of trigger names @@ -267,8 +289,8 @@ def GetTriggerString(self,trigList): return trig_string def GetFlagString(self,flagList): - '''Checks input list for missing flags and drops those missing (FilterColumnNames) - and then concatenates those remaining into an AND string (ConcatCols) + '''Checks input list for missing flags and drops those missing (#FilterColumnNames) + and then concatenates those remaining into an AND string (#ConcatCols) Args: flagList [str]: List of flag names @@ -282,11 +304,11 @@ def GetFlagString(self,flagList): return flag_string def GetFileName(self): - """Get input file name. + '''Get input file name. Returns: - File name - """ + str: File name + ''' return self.__fileName #------------------------------------------------------------# @@ -294,20 +316,21 @@ def GetFileName(self): # benefit of class keeping track of an Active Node (reset by # # each action and used by default). # #------------------------------------------------------------# - def Cut(self,name='',cuts='',node=None): - """Apply a cut/filter to a provided node or the #ActiveNode by default. - + def Cut(self,name,cuts,node=None): + '''Apply a cut/filter to a provided node or the #ActiveNode by default. Will add the resulting node to tracking and set it as the #ActiveNode. Args: name (str): Name for the cut for internal tracking and later reference. - cuts (str,CutGroup): A one-line C++ string that evaluates as a boolean or a CutGroup object which contains multiple C++ strings that evaluate as booleans. - node (Node): Node to apply the cut/filter. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + cuts (str,#CutGroup): A one-line C++ string that evaluates as a boolean or a CutGroup object which contains multiple actions that evaluate as booleans. + node (Node, optional): Node on which to apply the cut/filter. Defaults to #ActiveNode. - Returns: - New active Node. + Raises: + TypeError: If argument type is not Node. - """ + Returns: + Node: New #ActiveNode. + ''' if node == None: node = self.ActiveNode newNode = node @@ -317,31 +340,30 @@ def Cut(self,name='',cuts='',node=None): newNode = newNode.Cut(c,cut) newNode.name = cuts.name+'__'+c self.TrackNode(newNode) - # newNode.name = cuts.name elif isinstance(cuts,str): newNode = newNode.Cut(name,cuts) self.TrackNode(newNode) else: raise TypeError("Second argument to Cut method must be a string of a single cut or of type CutGroup (which provides an OrderedDict).") - # self.TrackNode(newNode) return self.SetActiveNode(newNode) - def Define(self,name='',variables='',node=None): - """Defines a variable/column on top of a provided node or the #ActiveNode by default. - - Will add the resulting node to tracking and set it as the #ActiveNode + def Define(self,name,variables,node=None): + '''Defines a variable/column on top of a provided node or the #ActiveNode by default. + Will add the resulting node to tracking and set it as the #ActiveNode. Args: - name (str): Name for the column for internal tracking and later reference. - cuts (str,CutGroup): A one-line C++ string that evaluates to desired value to store or a VarGroup object which contains multiple C++ strings that evaluate to the desired value(s). - node (Node): Node to create the new variable/column on top of. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + cuts (str,#VarGroup): A one-line C++ string that evaluates to desired value to store + or a #VarGroup object which contains multiple actions that evaluate to the desired values. + node (Node, optional): Node to create the new variable/column on top of. Defaults to #ActiveNode. - Returns: - New active Node. + Raises: + TypeError: If argument type is not Node. - """ + Returns: + Node: New #ActiveNode. + ''' if node == None: node = self.ActiveNode newNode = node @@ -363,16 +385,19 @@ def Define(self,name='',variables='',node=None): # Applies a bunch of action groups (cut or var) in one-shot in the order they are given def Apply(self,actionGroupList,node=None,trackEach=True): - """Applies a single CutGroup/VarGroup or an ordered list of Groups to the provided node or the #ActiveNode by default. + '''Applies a single CutGroup/VarGroup or an ordered list of Groups to the provided node or the #ActiveNode by default. Args: - actionGroupList (Group, list(Group)): The CutGroup or VarGroup to act on node or a list of CutGroups or VarGroups to act (in order) on node. - node (Node): Node to create the new variable/column on top of. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + node ([type], optional): Node to create the new variable/column on top of. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + trackEach (bool, optional): [description]. Defaults to True. + + Raises: + TypeError: If argument type is not Node. Returns: - New active Node. - """ + Node: New #ActiveNode. + ''' if node == None: node = self.ActiveNode newNode = node @@ -392,19 +417,18 @@ def Apply(self,actionGroupList,node=None,trackEach=True): return self.SetActiveNode(newNode) def Discriminate(self,name,discriminator,node=None,passAsActiveNode=None): - """Forks a node based upon a discriminator being True or False (#ActiveNode by default). + '''Forks a node based upon a discriminator being True or False (#ActiveNode by default). Args: name (str): Name for the discrimination for internal tracking and later reference. discriminator (str): A one-line C++ string that evaluates as a boolean to discriminate for the forking of the node. - node (Node): Node to discriminate. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. - passAsActiveNode (bool): True if the #ActiveNode should be set to the node that passes the discriminator. + node (Node, optional): Node to discriminate. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + passAsActiveNode (bool, optional): True if the #ActiveNode should be set to the node that passes the discriminator. False if the #ActiveNode should be set to the node that fails the discriminator. Defaults to None in which case the #ActiveNode does not change. Returns: - Dictionary with keys "pass" and "fail" corresponding to the passing and failing Nodes stored as values. - - """ + dict: Dictionary with keys "pass" and "fail" corresponding to the passing and failing Nodes stored as values. + ''' if node == None: node = self.ActiveNode newNodes = node.Discriminate(name,discriminator) @@ -422,18 +446,22 @@ def Discriminate(self,name,discriminator,node=None,passAsActiveNode=None): #---------------------# # Want to correct with analyzer class so we can track what corrections have been made for final weights and if we want to save them out in a group when snapshotting def AddCorrection(self,correction,evalArgs=[],node=None): - """Add a Correction to track. + '''Add a Correction to track. Sets new active node with all correction + variations calculated as new columns. Args: correction (Correction): Correction object to add. - evalArgs ([str]): List of arguments (NanoAOD branch names) to provide to per-event evaluation method. + evalArgs ([str], optional): List of arguments (NanoAOD branch names) to provide to per-event evaluation method. Default empty and clang will deduce if method definition argument names match columns in RDataFrame. - node (Node): Node to add correction on top of. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + node (Node, optional): Node to add correction on top of. Defaults to #ActiveNode. - Returns: - New active Node. + Raises: + TypeError: If argument types are not Node and Correction. + ValueError: If Correction type is not a weight or uncertainty. - """ + Returns: + Node: New #ActiveNode. + ''' if node == None: node = self.ActiveNode # Quick type checking @@ -461,17 +489,17 @@ def AddCorrection(self,correction,evalArgs=[],node=None): # self.TrackNode(returnNode) return self.SetActiveNode(newNode) - def AddCorrections(self,correctionList=[],node=None): - """Add multiple Corrections to track. + def AddCorrections(self,correctionList,node=None): + '''Add multiple Corrections to track. Sets new #ActiveNode with all correction + variations calculated as new columns. Args: - correctionList (list(Correction)): List of Correction objects to add. - node (Node): Node to add corrections on top of. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + correctionList ([Correction]): List of Correction objects to add. + node (Node, optional): [description]. Defaults to None. Returns: - New active Node. - - """ + Node: New #ActiveNode. + ''' if node == None: node = self.ActiveNode newNode = node @@ -481,6 +509,18 @@ def AddCorrections(self,correctionList=[],node=None): return self.SetActiveNode(newNode) def __checkCorrections(self,correctionNames,dropList): + '''Does type checking and drops specified corrections by name. + + Args: + correctionNames ([str]): List of correction names to include. + dropList ([type]): List of correction names to drop. + + Raises: + ValueError: If lists aren't provided. + + Returns: + [str]: List of remaining correction names. + ''' # Quick type checking if correctionNames == None: correctionsToApply = self.Corrections.keys() elif not isinstance(correctionNames,list): @@ -499,7 +539,7 @@ def __checkCorrections(self,correctionNames,dropList): return correctionsToApply def MakeWeightCols(self,node=None,correctionNames=None,dropList=[]): - """Makes columns/variables to store total weights based on the Corrections that have been added. + '''Makes columns/variables to store total weights based on the Corrections that have been added. This function automates the calculation of the columns that store the nominal weight and the variation of weights based on the corrections in consideration. The nominal weight will be the product @@ -520,9 +560,8 @@ def MakeWeightCols(self,node=None,correctionNames=None,dropList=[]): are dropped from consideration. Returns: - New active Node. - - """ + Node: New #ActiveNode. + ''' if node == None: node = self.ActiveNode correctionsToApply = self.__checkCorrections(correctionNames,dropList) @@ -556,17 +595,16 @@ def MakeWeightCols(self,node=None,correctionNames=None,dropList=[]): return self.SetActiveNode(returnNode) def MakeTemplateHistos(self,templateHist,variables,node=None): - """Generates the uncertainty template histograms based on the weights created by MakeWeightCols(). + '''Generates the uncertainty template histograms based on the weights created by #MakeWeightCols(). Args: - templateHist (TH1,TH2,TH3): An TH1, TH2, or TH3 used as a template to create the histograms. - variables (list(str)): A list of the columns/variables to plot (["x","y","z"]). - node (Node): Node to plot histograms from. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + templateHist (TH1,TH2,TH3): A TH1, TH2, or TH3 used as a template to create the histograms. + variables ([str]): A list of the columns/variables to plot (ex. ["x","y","z"]). + node (Node): Node to plot histograms from. Defaults to #ActiveNode. Returns: - HistGroup object which stores the uncertainty template histograms. - - """ + HistGroup: Uncertainty template histograms. + ''' if node == None: node = self.ActiveNode out = HistGroup('Templates') @@ -606,19 +644,18 @@ def MakeTemplateHistos(self,templateHist,variables,node=None): # Draw templates together to see up/down effects against nominal # #----------------------------------------------------------------# def DrawTemplates(self,hGroup,saveLocation,projection='X',projectionArgs=(),fileType='pdf'): - """Draw the template uncertainty histograms created by MakeTemplateHistos(). + '''Draw the template uncertainty histograms created by #MakeTemplateHistos(). Args: - hGroup (HistGroup): A HistGroup object storing the uncertainty template histograms. + hGroup (HistGroup): Uncertainty template histograms. saveLocation (str): Path to folder to save histograms. - projection (str): "X" (Default), "Y", or "Z". Axis to project onto if templates are not 1D. - projectionArgs (tuple): A tuple of arguments provided to ROOT TH1 ProjectionX(Y)(Z). - fileType (str): File type - "pdf", "png", etc (must be supported by TCanvas.Print()). + projection (str, optional): "X" (Default), "Y", or "Z". Axis to project onto if templates are not 1D. + projectionArgs (tuple, optional): A tuple of arguments provided to ROOT TH1 ProjectionX(Y)(Z). + fileType (str, optional): File type - "pdf", "png", etc (must be supported by TCanvas.Print()). Returns: None - - """ + ''' ROOT.gStyle.SetOptStat(0) canvas = ROOT.TCanvas('c','',800,700) @@ -673,17 +710,19 @@ def DrawTemplates(self,hGroup,saveLocation,projection='X',projectionArgs=(),file # cut that gets dropped # #----------------------------------------------# def Nminus1(self,node,cutgroup): - """Print a PDF image of the node structure of the analysis. - Requires python graphviz package (`pip install graphviz`) + '''Create an N-1 tree structure of nodes building off of `node` + with the N cuts from `cutgroup`. + + The structure is optimized so that as many actions are shared as possible + so that the N different nodes can be made. Use #PrintNodeTree() to visualize. Args: - cutgroup (CutGroup): CutGroup that you'd like to scan. + node (Node): Node to build on. + cutgroup (CutGroup): Group of N cuts to apply. Returns: - Dictionary with the final nodes - - """ - + dict: N nodes in dictionary with keys indicating the cut that was not applied. + ''' # Initialize print ('Performing N-1 scan for CutGroup %s'%cutgroup.name) @@ -712,15 +751,16 @@ def Nminus1(self,node,cutgroup): return nminusones def PrintNodeTree(self,outfilename,verbose=False): - """Print a PDF image of the node structure of the analysis. Requires python graphviz package (`pip install graphviz`) + '''Print a PDF image of the node structure of the analysis. + Requires python graphviz package which should be an installed dependency. Args: outfilename (str): Name of output PDF file. + verbose (bool, optional): Turns on verbose node labels. Defaults to False. Returns: None - - """ + ''' from graphviz import Digraph dot = Digraph(comment='Node processing tree') for node in self.AllNodes: @@ -738,69 +778,149 @@ def PrintNodeTree(self,outfilename,verbose=False): # Node Class # ############## class Node(object): - """Class to represent nodes in the DataFrame processing graph. Can make new nodes via Define, Cut, and Discriminate and setup relations between nodes (done automatically via Define, Cut, Discriminate)""" - def __init__(self, name, DataFrame, parent=None, children=[],action=''): + '''Class to represent nodes in the DataFrame processing graph. + Can make new nodes via Define, Cut, and Discriminate and setup + relations between nodes (done automatically via Define, Cut, Discriminate)''' + def __init__(self, name, DataFrame, action='', children=[]): + '''Constructor. Holds the RDataFrame and other associated information + for tracking in the #analyzer(). + + Methods which act on the RDataFrame always return a new node + since RDataFrame is not modified in place. + + Args: + name (str): Name for the node. Duplicate named nodes cannot be tracked simultaneously in the analyzer. + DataFrame (RDataFrame): Dataframe to track. + children ([Node], optional): Child nodes if they exist. Defaults to []. + action (str, optional): Action performed (the C++ line). Default is '' but should only be used for a base RDataFrame. + ''' super(Node, self).__init__() self.DataFrame = DataFrame self.name = name self.action = action - # self.parent = parent # None or specified - self.children = children # list of length 0, 1, or 2 + self.children = children def Clone(self,name=''): - if name == '':return Node(self.name,self.DataFrame,parent=[],children=[],action=self.action) - else: return Node(name,self.DataFrame,parent=[],children=[],action=self.action) + '''Clones Node instance without child information and with new name if specified. + + Args: + name (str, optional): Name for clone. Defaults to current name. + + Returns: + Node: Clone of current instance. + ''' + if name == '':return Node(self.name,self.DataFrame,children=[],action=self.action) + else: return Node(name,self.DataFrame,children=[],action=self.action) - # Set parent of type Node - # def SetParent(self,parent): - # if isinstance(parent,Node): self.parent = parent - # else: raise TypeError('Parent is not an instance of Node class for node %s'%self.name) + def SetChild(self,child,overwrite=False): + '''Set one of child for the node. - # Set one child of type Node - def SetChild(self,child,overwrite=False,silence=False): + Args: + child (Node): Child node to add. + overwrite (bool, optional): Overwrites all current children stored. Defaults to False. + + Raises: + TypeError: If argument type is not Node. + ''' if overwrite: self.children = [] - # if len(children > 1): raise ValueError("More than two children are trying to be added node %s. You may use the overwrite option to erase current children or find your bug."%self.name) - # if len(children == 1) and silence == False: raw_input('WARNING: One child is already specified for node %s and you are attempting to add another (max 2). Press enter to confirm and continue.'%self.name) - if isinstance(child,Node): self.children.append(child) - else: raise TypeError('Child is not an instance of Node class for node %s' %self.name) + if isinstance(child,Node): + if child.name not in [c.name for c in self.children]: + self.children.append(child) + else: + raise NameError('Attempting to add child node "%s" but one with this name already exists in node "%s".'%(child.name, self.name)) + else: + raise TypeError('Child is not an instance of Node class for node %s' %self.name) - # Set children of type Node def SetChildren(self,children,overwrite=False): + '''Set multiple children for the node. + + Args: + children ([Node], {str:Node}): List of children or dictionary of children. + overwrite (bool, optional): Overwrites all current children stored. Defaults to False. + + Raises: + TypeError: If argument type is not dict or list of Node. + ''' if overwrite: self.children = [] - # if len(children > 0): raise ValueError("More than two children are trying to be added node %s. You may use the overwrite option to erase current children or find your bug."%self.name) - if isinstance(children,dict) and 'pass' in children.keys() and 'fail' in children.keys() and len(children.keys()) == 2: - self.SetChild(children['pass']) - self.SetChild(children['fail']) + if isinstance(children,dict): + for c in children.keys(): + if isinstance(child,Node): + self.SetChild(children[c]) + else: + raise TypeError('Child is not an instance of Node class for node %s' %self.name) + + elif isinstance(children,list): + for c in children: + if isinstance(child,node): + self.SetChild(c) + else: + raise TypeError('Child is not an instance of Node class for node %s' %self.name) else: - raise TypeError('Attempting to add a dictionary of children of incorrect format. Argument must be a dict of format {"pass":class.Node,"fail":class.Node}') + raise TypeError('Attempting to add chidren that are not in a list or dict.') - # Define a new column to calculate def Define(self,name,var): + '''Produces a new Node with the provided variable/column added. + + Args: + name (str): Name for the column for internal tracking and later reference. + cuts (str): A one-line C++ string that evaluates to desired value to store. + + Returns: + Node: New Node object with new column added. + ''' print('Defining %s: %s' %(name,var)) - newNode = Node(name,self.DataFrame.Define(name,var),parent=self,children=[],action=var) + newNode = Node(name,self.DataFrame.Define(name,var),children=[],action=var) self.SetChild(newNode) return newNode - # Define a new cut to make def Cut(self,name,cut): + '''Produces a new Node with the provided cut/filter applied. + + Args: + name (str): Name for the cut for internal tracking and later reference. + cuts (str): A one-line C++ string that evaluates as a boolean. + + Returns: + Node: New #ActiveNode. + ''' print('Filtering %s: %s' %(name,cut)) - newNode = Node(name,self.DataFrame.Filter(cut,name),parent=self,children=[],action=cut) + newNode = Node(name,self.DataFrame.Filter(cut,name),children=[],action=cut) self.SetChild(newNode) return newNode - # Discriminate based on a discriminator def Discriminate(self,name,discriminator): + '''Produces a dictionary with two new Nodes made by forking the current node based upon a discriminator being True or False. + + Args: + name (str): Name for the discrimination for internal tracking and later reference. + discriminator (str): A one-line C++ string that evaluates as a boolean to discriminate on. + + Returns: + dict: Dictionary with keys "pass" and "fail" corresponding to the passing and failing Nodes stored as values. + ''' passfail = { - "pass":Node(name+"_pass",self.DataFrame.Filter(discriminator,name+"_pass"),parent=self,children=[],action=discriminator), - "fail":Node(name+"_fail",self.DataFrame.Filter("!("+discriminator+")",name+"_fail"),parent=self,children=[],action="!("+discriminator+")") + "pass":Node(name+"_pass",self.DataFrame.Filter(discriminator,name+"_pass"),children=[],action=discriminator), + "fail":Node(name+"_fail",self.DataFrame.Filter("!("+discriminator+")",name+"_fail"),children=[],action="!("+discriminator+")") } self.SetChildren(passfail) return passfail - # Applies a bunch of action groups (cut or var) in one-shot in the order they are given def Apply(self,actiongrouplist): + '''Applies a single CutGroup/VarGroup or an ordered list of Groups to the provided node or the #ActiveNode by default. + + Args: + actionGroupList (Group, list(Group)): The CutGroup or VarGroup to act on node or a list of CutGroups or VarGroups to act (in order) on node. + node ([type], optional): Node to create the new variable/column on top of. Must be of type Node (not RDataFrame). Defaults to #ActiveNode. + trackEach (bool, optional): [description]. Defaults to True. + + Raises: + TypeError: If argument type is not Node. + + Returns: + Node: New #ActiveNode. + ''' if type(actiongrouplist) != list: actiongrouplist = [actiongrouplist] node = self for ag in actiongrouplist: @@ -844,12 +964,10 @@ def Snapshot(self,columns,outfilename,treename,lazy=False,openOption='RECREATE') elif type(columns) == str: self.DataFrame.Snapshot(treename,outfilename,columns,opts) else: - # column_vec = ROOT.std.vector('string')() column_vec = '' for c in columns: column_vec += c+'|' column_vec = column_vec[:-1] - # column_vec.push_back(c) self.DataFrame.Snapshot(treename,outfilename,column_vec,opts) ############################## diff --git a/docs/annotated.html b/docs/annotated.html index 5a3e0e5..ba950dc 100644 --- a/docs/annotated.html +++ b/docs/annotated.html @@ -76,16 +76,10 @@  CHistGroupStores histograms with dedicated function to use TH1/2/3 methods in a batch  CNodeClass to represent nodes in the DataFrame processing graph  CVarGroupStores Define actions - CBTagCalibration - CBTagCalibrationReader - CBTagCalibrationReaderImpl - CTmpEntry - CBTagEntry - CParameters - CCollection - CJetRecalibrator - CPDFweight_uncert - CSJBtag_SF + CCollection + CJetRecalibrator + CPDFweight_uncert + CSJBtag_SF diff --git a/docs/class_t_i_m_b_e_r_1_1_analyzer_1_1_node-members.html b/docs/class_t_i_m_b_e_r_1_1_analyzer_1_1_node-members.html index 4babcaf..c5a9582 100644 --- a/docs/class_t_i_m_b_e_r_1_1_analyzer_1_1_node-members.html +++ b/docs/class_t_i_m_b_e_r_1_1_analyzer_1_1_node-members.html @@ -72,19 +72,19 @@

This is the complete list of members for TIMBER.Analyzer.Node, including all inherited members.

- + - + - - + + - - + + - - - + + +
__init__(self, name, DataFrame, parent=None, children=[], action='') (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
__init__(self, name, DataFrame, action='', children=[])TIMBER.Analyzer.Node
action (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Apply(self, actiongrouplist) (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Apply(self, actiongrouplist)TIMBER.Analyzer.Node
children (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Clone(self, name='') (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Cut(self, name, cut) (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Clone(self, name='')TIMBER.Analyzer.Node
Cut(self, name, cut)TIMBER.Analyzer.Node
DataFrame (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Define(self, name, var) (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Discriminate(self, name, discriminator) (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Define(self, name, var)TIMBER.Analyzer.Node
Discriminate(self, name, discriminator)TIMBER.Analyzer.Node
name (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
SetChild(self, child, overwrite=False, silence=False) (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
SetChildren(self, children, overwrite=False) (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
Snapshot(self, columns, outfilename, treename, lazy=False, openOption='RECREATE') (defined in TIMBER.Analyzer.Node)TIMBER.Analyzer.Node
SetChild(self, child, overwrite=False)TIMBER.Analyzer.Node
SetChildren(self, children, overwrite=False)TIMBER.Analyzer.Node
Snapshot(self, columns, outfilename, treename, lazy=False, openOption='RECREATE')TIMBER.Analyzer.Node