Skip to content

Commit

Permalink
Merge pull request #16 from lcorcodilos/test_dev
Browse files Browse the repository at this point in the history
Development from creation of HSF DAWG standardized examples
  • Loading branch information
lcorcodilos authored Oct 26, 2020
2 parents 0a985bf + 2493a32 commit 620df99
Show file tree
Hide file tree
Showing 20 changed files with 651 additions and 127 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Setup environment
run: |
sudo apt-get install graphviz libgraphviz-dev
sudo ln -s /usr/lib/x86_64-linux-gnu/libclang-6.0.so.1 /usr/lib/x86_64-linux-gnu/libclang.so
python -m pip install --upgrade pip
pip install virtualenv
python -m virtualenv timber-env
Expand Down
374 changes: 288 additions & 86 deletions TIMBER/Analyzer.py

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions TIMBER/Framework/include/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ namespace analyzer {
return v;
}

RVec<ROOT::Math::PtEtaPhiMVector> TLvector(RVec<float> pt,RVec<float> eta,RVec<float> phi,RVec<float> m) {
RVec<ROOT::Math::PtEtaPhiMVector> vs;
for (int i = 0; i < pt.size(); i++) {
ROOT::Math::PtEtaPhiMVector v(pt[i],eta[i],phi[i],m[i]);
vs.push_back(v);
}
return vs;
}

float transverseMass(float MET_pt, float obj_pt, float MET_phi, float obj_phi) {
return sqrt(2.0*MET_pt*obj_pt-(1-cos(deltaPhi(MET_phi,obj_phi))));
}

double invariantMass(ROOT::Math::PtEtaPhiMVector v1, ROOT::Math::PtEtaPhiMVector v2) {
return (v1+v2).M();
}
Expand Down
56 changes: 28 additions & 28 deletions TIMBER/Tools/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
@{
'''

import json, ROOT, os, subprocess
import json, ROOT, os, subprocess, TIMBER
from ROOT import RDataFrame
from TIMBER.Tools.CMS import CMS_lumi, tdrstyle
from contextlib import contextmanager
Expand Down Expand Up @@ -79,7 +79,7 @@ def StitchQCD(QCDdict,normDict=None):
for hkey in QCDdict[k].keys():
QCDdict[k][hkey].Scale(normDict[k])
# Stitch
out = HistGroup("QCD")
out = TIMBER.Analyzer.HistGroup("QCD")
for ksample in QCDdict.keys():
for khist in QCDdict[ksample].keys():
if khist not in out.keys():
Expand Down Expand Up @@ -161,32 +161,32 @@ def AsciiEncodeDict(data):
ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
return dict(map(ascii_encode, pair) for pair in data.items())

def ConcatCols(self,colnames,val='1',connector='&&'):
'''Concatenates a list of column names evaluating to a common `val` (usually 1 or 0)
with some `connector` (boolean logic operator).
@param colnames ([str]): List of column names.
@param val (str): Value to test equality of all columns. Defaults to '1'.
@param connector (str): C++ boolean logic operator between column equality checks. Defaults to '&&'.
Returns:
str: Concatenated string of the entire evaluation that in C++ will return a bool.
'''
concat = ''
for i,c in enumerate(colnames):
if concat == '':
concat = '((%s==%s)'%(c,val)
else:
concat += ' %s (%s==%s)'%(connector,c,val)

if concat != '':
concat += ')'
return concat
def ConcatCols(colnames,val='1',connector='&&'):
'''Concatenates a list of column names evaluating to a common `val` (usually 1 or 0)
with some `connector` (bool logic operator).
@param colnames ([str]): List of column names.
@param val (str): Value to test equality of all columns. Defaults to '1'.
@param connector (str): C++ bool logic operator between column equality checks. Defaults to '&&'.
Returns:
str: Concatenated string of the entire evaluation that in C++ will return a bool.
'''
concat = ''
for c in colnames:
if concat == '':
concat = '((%s==%s)'%(c,val)
else:
concat += ' %s (%s==%s)'%(connector,c,val)

if concat != '':
concat += ')'

return concat

def GetHistBinningTuple(h):
'''Gets the binning information for a histogram and returns it
as a tuple ordered like the arguements to construct a new histogram.
as a tuple ordered like the arguments to construct a new histogram.
Supports TH1, TH2, and TH3.
@param h (TH1): Input histogram from which to get the binning information.
Expand All @@ -195,7 +195,7 @@ def GetHistBinningTuple(h):
TypeError: If histogram does not derive from TH1.
Returns:
tuple(tuple, int): First element of return is the binning and the second element is the dimensionality.
tuple(tuple, int): First element of return is the binning and the second element is the dimension.
'''
# At least 1D (since TH2 and TH3 inherit from TH1)
if isinstance(h,ROOT.TH1):
Expand Down Expand Up @@ -272,7 +272,7 @@ def DictStructureCopy(inDict):
newDict = {}
for k1,v1 in inDict.items():
if type(v1) == dict:
newDict[k1] = dictStructureCopy(v1)
newDict[k1] = DictStructureCopy(v1)
else:
newDict[k1] = 0
return newDict
Expand All @@ -288,7 +288,7 @@ def DictCopy(inDict):
newDict = {}
for k1,v1 in inDict.items():
if type(v1) == dict:
newDict[k1] = dictCopy(v1)
newDict[k1] = DictCopy(v1)
else:
newDict[k1] = v1
return newDict
Expand Down
64 changes: 64 additions & 0 deletions benchmark/ex.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include "TIMBER/Framework/include/common.h"
#include "ROOT/RVec.hxx"
#include <Math/Vector4Dfwd.h>
#include <Math/GenVector/LorentzVector.h>
using namespace ROOT::VecOps;

// For ex5.py and ex6.py
// Muon pair mass for all combinations of muons with opposite sign
RVec<float> InvMassOppMuMu (RVec<float> Muon_pt, RVec<float> Muon_eta, RVec<float> Muon_phi, RVec<float> Muon_mass, RVec<float> Muon_charge) {
int nMuon = Muon_pt.size();
int mu0idx, mu1idx;
ROOT::Math::PtEtaPhiMVector mu0LV;
ROOT::Math::PtEtaPhiMVector mu1LV;
RVec<RVec<int>> comboIdxs = Combinations(Muon_pt,2);
RVec<float> invMass;

for (int i = 0; i < comboIdxs[0].size(); i++) {
mu0idx = comboIdxs[0][i];
mu1idx = comboIdxs[1][i];

if (Muon_charge[mu0idx] != Muon_charge[mu1idx]) {
mu0LV.SetCoordinates(Muon_pt[mu0idx], Muon_eta[mu0idx], Muon_phi[mu0idx], Muon_mass[mu0idx]);
mu1LV.SetCoordinates(Muon_pt[mu1idx], Muon_eta[mu1idx], Muon_phi[mu1idx], Muon_mass[mu1idx]);
invMass.push_back((mu0LV+mu1LV).M());
}
}

return invMass;
}

// (Lepton_pt > 10) && (analyzer::DeltaR(Lepton_eta, Jet_eta, Lepton_phi, Jet_phi) < 0.4)
RVec<float> CloseLepVeto (RVec<float> Lepton_pt, RVec<float> Lepton_eta, RVec<float> Lepton_phi, RVec<float> Jet_eta, RVec<float> Jet_phi) {
RVec<bool> jet_bool_vect;
bool bool_pt, bool_deltaR, found_lep;
for (int ijet = 0; ijet < Jet_eta.size(); ijet++) {
found_lep = false;
for (int ilep = 0; ilep < Lepton_pt.size(); ilep++) {
bool_pt = Lepton_pt[ilep] > 10;
bool_deltaR = DeltaR(Lepton_eta[ilep], Jet_eta[ijet], Lepton_phi[ilep], Jet_phi[ijet]) < 0.4;
if (bool_pt && bool_deltaR){
found_lep = true;
}
}
jet_bool_vect.push_back(found_lep);
}
return jet_bool_vect;
}

int NonZlep(RVec<ROOT::Math::PtEtaPhiMVector> Lepton_vect, RVec<int> Lepton_pdgId, RVec<int> Lepton_charge) {
RVec<RVec<int>> combos = Combinations(Lepton_vect,3); // build combinations where first two are the Z
int NonZlep_idx = -1;
float deltaMZ = 1000.; // start at large value for comparison
float dMZ;
bool sameFlavOppSign;
for (int i = 0; i < combos[0].size(); i++) { // loop over combinations
dMZ = abs(91.2-(Lepton_vect[combos[0][i]]+Lepton_vect[combos[1][i]]).M());
sameFlavOppSign = (Lepton_pdgId[combos[0][i]] == -1*Lepton_pdgId[combos[1][i]]);
if ((dMZ < deltaMZ) && sameFlavOppSign) {
deltaMZ = dMZ;
NonZlep_idx = combos[2][i];
}
}
return NonZlep_idx;
}
10 changes: 10 additions & 0 deletions benchmark/ex1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
'''Plotting the Missing ET (or any event level variable).'''
import time
start = time.time()
#---------------------
from TIMBER.Analyzer import analyzer
a = analyzer('examples/GluGluToHToTauTau_full.root')
h = a.DataFrame.Histo1D('MET_pt')
h.Draw('hist e')
#---------------------
print ('%s secs'%(time.time() - start))
10 changes: 10 additions & 0 deletions benchmark/ex2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
'''Plotting the Jet pT (or any variable that is a per-event array).'''
import time
start = time.time()
#---------------------
from TIMBER.Analyzer import analyzer, HistGroup
a = analyzer('examples/GluGluToHToTauTau_full.root')
h = a.DataFrame.Histo1D('Jet_pt')
h.Draw('hist e')
#---------------------
print ('%s secs'%(time.time() - start))
11 changes: 11 additions & 0 deletions benchmark/ex3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
'''Plotting the Jet pT for jets that have a jet pT > 20 GeV and abs(jet eta) < 1.0.'''
import time
start = time.time()
#---------------------
from TIMBER.Analyzer import analyzer, HistGroup
a = analyzer('examples/GluGluToHToTauTau_full.root')
a.Define('JetSel_pt','Jet_pt[abs(Jet_eta) < 2.4]')
h = a.DataFrame.Histo1D('JetSel_pt')
h.Draw('hist e')
#---------------------
print ('%s secs'%(time.time() - start))
11 changes: 11 additions & 0 deletions benchmark/ex4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
'''Plotting the Missing ET for jets[sic?] with at least 2 jets with Jet pT > 40 and abs(jet Eta) < 1.0'''
import time
start = time.time()
#---------------------
from TIMBER.Analyzer import analyzer
a = analyzer('examples/GluGluToHToTauTau_full.root')
a.Cut('twoHighPtJets','Sum(Jet_pt > 40 && abs(Jet_eta) < 1.0) >= 2')
h = a.DataFrame.Histo1D('MET_pt')
h.Draw('hist e')
#---------------------
print ('%s secs'%(time.time() - start))
15 changes: 15 additions & 0 deletions benchmark/ex5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
'''Plot the opposite-sign muon pair mass for all combinations of muons'''
import time
start = time.time()
#---------------------
from TIMBER.Analyzer import analyzer
from TIMBER.Tools.Common import CompileCpp
CompileCpp('benchmark/ex.cc')

a = analyzer('examples/GluGluToHToTauTau_full.root')
a.Cut('twoMuons','nMuon>1')
a.Define('invMassMuMu','InvMassOppMuMu(Muon_pt, Muon_eta, Muon_phi, Muon_mass, Muon_charge)')
h = a.DataFrame.Histo1D(('invMassMuMu','Invariant mass of opposite sign muon pairs',100,0,200),'invMassMuMu')
h.Draw('hist e')
#---------------------
print ('%s secs'%(time.time() - start))
17 changes: 17 additions & 0 deletions benchmark/ex6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
'''Plot the Missing ET for events that have an opposite-sign muon
pair mass in the range 60-120 GeV (double loop over single collection, math)'''
import time
start = time.time()
#---------------------
from TIMBER.Analyzer import analyzer
from TIMBER.Tools.Common import CompileCpp
CompileCpp('benchmark/ex.cc')

a = analyzer('examples/GluGluToHToTauTau_full.root')
a.Cut('twoMuons','nMuon>1')
a.Define('invMassMuMu','InvMassOppMuMu(Muon_pt, Muon_eta, Muon_phi, Muon_mass, Muon_charge)')
a.Cut('invMassMuMu_cut','Any(invMassMuMu > 60 && invMassMuMu < 120)')
h = a.DataFrame.Histo1D('MET_pt')
h.Draw('hist e')
#---------------------
print ('%s secs'%(time.time() - start))
18 changes: 18 additions & 0 deletions benchmark/ex7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
'''Plot the sum of the pT of jets with pT > 30 GeV that are not within 0.4 from any lepton with pt > 10 GeV (looping over two collections)'''
import time
start = time.time()
#---------------------
from TIMBER.Analyzer import analyzer
from TIMBER.Tools.Common import CompileCpp
CompileCpp('benchmark/ex.cc')

a = analyzer('examples/ttbar16_sample.root') # GluGlu sample is incomplete and missing electrons - this is a private stand in that must be replaced to work
a.MergeCollections("Lepton",["Electron","Muon"])
nearLep = 'CloseLepVeto (Lepton_pt, Lepton_eta, Lepton_phi, Jet_eta, Jet_phi)'
a.Define('goodJet_pt','Jet_pt[!(%s)]'%nearLep)
a.Define('goodJet_pt30','goodJet_pt[goodJet_pt > 30]')
a.Define('PtSum','Sum(goodJet_pt30)')
h = a.DataFrame.Histo1D('PtSum')
h.Draw('hist e')
#---------------------
print ('%s secs'%(time.time() - start))
24 changes: 24 additions & 0 deletions benchmark/ex8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
'''Plot the transverse mass of the Missing ET and a lepton for events with at least
3 leptons and two of the leptons make up a Z (same flavor, opp sign, closest to invar
mass of 91.2 GeV), where the lepton being plotted is the third lepton.'''
# Lucas' Note: There could be four leptons in the event where two satisfy the Z condition
# and then either of the remaining two could be the one used in the transverse mass
# calculation. I do not consider that case (the first that comes up will be picked here).
import time
start = time.time()
#---------------------
from TIMBER.Analyzer import analyzer
from TIMBER.Tools.Common import CompileCpp
CompileCpp('benchmark/ex.cc')

a = analyzer('examples/ttbar16_sample.root') # GluGlu sample is incomplete and missing electrons - this is a private stand in that must be replaced to work
a.MergeCollections("Lepton",["Electron","Muon"])
a.Cut('nLepton2','nLepton>2')
a.Define('Lepton_vect','analyzer::TLvector(Lepton_pt, Lepton_eta, Lepton_phi, Lepton_mass)')
a.Define('NonZlep_idx','NonZlep(Lepton_vect,Lepton_pdgId,Lepton_charge)')
a.Define('MT','NonZlep_idx == -1 ? -1 : analyzer::transverseMass(MET_pt, Lepton_pt[NonZlep_idx], MET_phi, Lepton_phi[NonZlep_idx])')
a.Cut('MT_cut','MT>=0')
h = a.DataFrame.Histo1D('MT')
h.Draw('hist e')
#---------------------
print ('%s secs'%(time.time() - start))
33 changes: 33 additions & 0 deletions benchmark/ex9.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
'''Create a group of plots (jet pT, eta, phi, N_jets). Now make
it for all events, for events with missing et > 20 GeV, and for
events with missing et > 20 GeV and 2 jets with 40 GeV and abs(eta)
< 1.0. Demonstrate making "groups" of plots, and a graphical cut flow'''
import time
start = time.time()
#---------------------
import ROOT
from TIMBER.Analyzer import analyzer

binning_tuples = {
'Jet_pt': ('Jet_pt','Jet p_{T}',100,0,500),
'Jet_eta': ('Jet_eta','Jet #eta',100,-5,5),
'Jet_phi': ('Jet_phi','Jet #phi',100,-3.14,3.14),
'nJet': ('nJet','Number of Jets',10,0,10)
}

a = analyzer('examples/GluGluToHToTauTau_full.root')
raw_hists = a.MakeHistsWithBinning(binning_tuples)
a.Cut('MET_cut','MET_pt>20')
METcut_hists = a.MakeHistsWithBinning(binning_tuples)
a.Cut('Jet_PtEta_cut','Sum(Jet_pt>40 && abs(Jet_eta)<1.0) > 1')
final_hists = a.MakeHistsWithBinning(binning_tuples)

all_hists = raw_hists+METcut_hists+final_hists

out_file = ROOT.TFile.Open('benchmark/ex9_hists.root','RECREATE')
all_hists.Do('Write')
out_file.Close()

a.PrintNodeTree('benchmark/ex9_tree.pdf')
#---------------------
print ('%s secs'%(time.time() - start))
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
include_package_data=True,
# cmdclass={'install': AddToPath},
install_requires = [
"graphviz",
"graphviz==0.14.2",
"pygraphviz==1.5",
"networkx==2.2",
"clang==6.0.0.2"
]
)
Expand Down
6 changes: 6 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ then
return
fi

if ! command -v dot &> /dev/null
then
echo "dot (graphviz) could not be found. Please install it first... (on Ubuntu `sudo apt-get install graphviz libgraphviz-dev`)"
exit
fi

python setup.py install
activate_path=$VIRTUAL_ENV/bin/activate
TIMBERPATH="$PWD/"
Expand Down
8 changes: 8 additions & 0 deletions test/test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"test1": 1,
"test2": {
"float": 1.0,
"str": "string",
"list": []
}
}
Loading

0 comments on commit 620df99

Please sign in to comment.