Merge pull request #34 from ResonantHbbHgg/play-the-toy

Scripts for toy limits study
ResonantHbbHgg · Apr 4, 2018 · 9ba0e18 · 9ba0e18
2 parents 0e8a150 + c7075c2
commit 9ba0e18
Show file tree

Hide file tree

Showing 21 changed files with 560 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -169,10 +169,6 @@ Here, `-s` option can be used if only limits for `kt>0` are produced. In this ca
 plot is simply drawn symmetrically over (0,0) point in (kl,kt) coordinates.
 
 
-PS. In order to reproduce the _EPS17_ results, follow the instructions here:
-[SmartScripts/README.md](SmartScripts/README.md)
-
-
 
 ### Working examples for Resonant case
 
@@ -184,14 +180,14 @@ For low masses:
 ```shell
 for s in Radion BulkGraviton;
   do echo ${s};  
-  for m in 250 260 270 280 300 320 350 400 450 500 550 600;  
+  for m in 250 260 270 280 300 320 340 350 400 450 500 550 600;  
 	do echo ${s} ${m};  
 	python scripts/makeAllTrees.py -x res -d /eos/cms/store/group/phys_higgs/resonant_HH/RunII/FlatTrees/2016/Mar82018_ForPubli_RafStyle/Data/Hadd/ -s /eos/cms/store/group/phys_higgs/resonant_HH/RunII/FlatTrees/2016/Mar82018_ForPubli_RafStyle/Signal/Hadd/ -f LT_ResOutDirOne_ --doCatMVA --MVAHMC0 0.960 --MVAHMC1 0.700 --MVALMC0 0.960 --MVALMC1 0.700 --massNR 500 --LMLJBTC 0.0 --LMSJBTC 0.0 --resMass ${m} --resType ${s};  
 	done;  
   done
 ```
 
-For high masses (some nambers are different in the parameters wrt low masses):
+For high masses (some numbers are different in the parameters wrt low masses):
 ```shell
 for s in Radion BulkGraviton;  
   do echo ${s};  
@@ -202,7 +198,7 @@ for s in Radion BulkGraviton;
   done
 ```
 
-To produce the limit trees, modify the input paths in `conf_resonant_LM.json` and
+To produce the limits, modify the input paths in `conf_resonant_LM.json` and
 `conf_resonant_HM.json` files and then run locally (it's rather quick compared to the
 non-resonant limits):
 

diff --git a/conf_default.json b/conf_default.json
@@ -1,6 +1,6 @@
 
 {
-    "LTDIR" : "/store/group/phys_higgs/resonant_HH/RunII/LimitTrees/FixMX/NewHope_TYPE",
+    "LTDIR" : "/store/group/phys_higgs/resonant_HH/RunII/LimitTrees/FixPhoCor/LT_PhotonCorrFix_TYPE",
     "signal" : {
 	"types" : ["HighMass", "LowMass"],
 	"signalModelCard" : "/src/HiggsAnalysis/bbggLimits/Models/models_2D_higgs_mjj70.rs"

diff --git a/conf_resonant_HM.json b/conf_resonant_HM.json
@@ -1,6 +1,6 @@
 
 {
-    "LTDIR" : "/afs/cern.ch/user/a/andrey/work/hh/LimitCode/CMSSW_7_4_7/src/HiggsAnalysis/bbggLimits/LT_RES_Mar21_HighMass_TYPE",
+    "LTDIR" : "/store/group/phys_higgs/resonant_HH/RunII/LimitTrees/FixPhoCor_res/LT_RES_Mar21_HighMass_TYPE",
     "signal" : {
 	"types" : ["Radion", "BulkGraviton"],
 	"signalModelCard" : "/src/HiggsAnalysis/bbggLimits/Models/models_2D_higgs_mjj70.rs"

diff --git a/conf_resonant_LM.json b/conf_resonant_LM.json
@@ -1,6 +1,6 @@
 
 {
-    "LTDIR" : "/afs/cern.ch/user/a/andrey/work/hh/LimitCode/CMSSW_7_4_7/src/HiggsAnalysis/bbggLimits/LT_RES_Mar21_LowMass_TYPE",
+    "LTDIR" : "/store/group/phys_higgs/resonant_HH/RunII/LimitTrees/FixPhoCor_res/LT_RES_Mar21_LowMass_TYPE",
     "signal" : {
 	"types" : ["Radion", "BulkGraviton"],
 	"signalModelCard" : "/src/HiggsAnalysis/bbggLimits/Models/models_2D_higgs_mjj70.rs"

diff --git a/python/LimitsUtil.py b/python/LimitsUtil.py
@@ -358,7 +358,7 @@ def runFullChain(opt, Params, point=None, NRgridPoint=-1, extraLabel=''):
     for t in signalTypes:
       strReplace = baseFolder+'/'+t+Label+'/datacards/'+os.getenv("CMSSW_BASE")+'/src/HiggsAnalysis/bbggLimits/'
       os.system("sed -i 's|"+strReplace+"||g' "+combCard)
-      print "String to replace:", strReplace
+      procLog.info("String to replace: "+ strReplace)
 
     if opt.analyticalRW:
       # Make another datacard, with entries for kt-reweihgting of single Higgs background

diff --git a/toyStudy/README.md b/toyStudy/README.md
@@ -0,0 +1,114 @@
+## Read the workspaces 
+
+A script called [readWS.py](readWS.py) allows you to read the workspaces from the signal and
+background of the actual analysis (which are also commited here).
+
+Here is a plot of the data distribution and backround fit in mgg in the most sensitive
+category (excluding single Higgs): 
+
+![Bkg fit in mgg](figs/fig_bkg_mgg.png)
+
+Total number of data events in this range is 118, signal prediction is 4.27 (normalized to 1 fb cross section).  
+
+
+## How to play the toys
+
+### Cut and count
+
+A template card for this game is [card_cut_n_count.txt](card_cut_n_count.txt), in which
+the strings `%SIG%` and `%BKG%` will be replaced in a script.  
+The corresponding script is [toyLimit_cut_n_count.py](toyLimit_cut_n_count.py). Just run it:
+
+``` 
+python toyLimit_cut_n_count.py 
+```
+
+You will get a plot like this:  
+![Cut-n-count results](figs/fig_cut_n_count.png)
+
+As we can see the relationship between the limit and &Sqrt;(N_bkg) is indeed
+linear. However, we also notice that the line does not cross zero. That means that there
+is an offset in the scaling of the limits. We are at the level of small number of events
+in the background.  From the backgound `mgg` shape `dN/dmgg = 2 events/GeV`. If we
+consider two cases of widths, 2 GeV and 3.2 GeV, we get 4 and 6.4 events
+correspondingly. So let's take those numbers and look at how the limits scale:
+
+| N_sig | Limit with N_bkg=4 events | Limit with N_Bkg=6.4 events| (L(6.4)-L(4))/L(4) | compare to &Sqrt;6.4/&Sqrt;4 - 1 |
+| - | - | - | - | - |
+| 2 | 2.71 | 3.23 | 0.193 | 0.265 |
+| 4 | 1.36 | 1.62 | 0.193 | 0.265 |
+| 6 | 0.90 | 1.08 | 0.199 | 0.265 |
+
+That is, already with the counting experiment the limit change is smaller than 26%.
+
+### Play with shapes 
+
+Template card for this game is [card_shape_n_roll.txt](card_shape_n_roll.txt). Here we
+will be replacing `%SIG%`, `%BKG%` and `%SIGMA%` within
+[toyLimit_shape_n_roll.py](toyLimit_shape_n_roll.py) script. In the analysis, Nsig = 4
+events, N_bkg = 118 events (total number in 100 &lt; mgg &lt; 180 GeV), sigma = 1.0 (with
+a bug) or 1.6 GeV (corrected). Signal shape is Gaussian with mean at 125 GeV and variable
+&sigma;, while the background is a simple Exponential function.
+
+
+The toys generated for that game as well as the background fit used are shown here:
+![Toy data](figs/fig_gen_bkg.png)
+
+``` 
+python toyLimit_shape_n_roll.py 
+```
+
+After running the script, you will get a plot like this, which shows how the limits scale with the width of the gaussian:  
+![Shape results](figs/fig_scale_with_sigma.png)
+
+Indeed, the scaling is as &Sqrt;&sigma;, nevertheless the ratio at 1.6 to 1.0 GeV sigmas are as follows:
+
+| N_bkg | N_sig | Limit with &sigma;(sig) = 1 GeV | Limit with &sigma;(sig) = 1.6 GeV| L(1.6)-L(1.0))/L(1.0) | compare to &Sqrt;1.6/&Sqrt;1 - 1 |
+|-|-|-|-|-|-|
+| 100 | 2 | 3.10 | 3.70 | 0.194 | 0.265 |
+| 120 | 2 | 3.33 | 3.98 | 0.197 | 0.265 |
+| 140 | 2 | 3.52 | 4.27 | 0.213 | 0.265 |
+| 100 | 4 | 1.55 | 1.85 | 0.194 | 0.265 |
+| **120** | **4** | **1.66** | **1.99** | **0.197** | **0.265** |
+| 140 | 4 | 1.76 | 2.13 | 0.213 | 0.265 |
+| 100 | 6 | 1.04 | 1.24 | 0.196 | 0.265 |
+| 120 | 6 | 1.11 | 1.33 | 0.205 | 0.265 |
+| 140 | 6 | 1.18 | 1.42 | 0.206 | 0.265 |
+
+(in bold is the case closest to our analysis)
+
+Now, let's do a similar thing and instead of using Gasssian for the signal shape we use Double Sided
+Crystal Ball function with exactly the same parameters as in the analysis workspaces (before and after the bug fix):
+
+<img src="figs/fig_gen_CB_bug.png" width="400"/>
+<img src="figs/fig_gen_CB_fix.png" width="400"/>
+
+Here are the results:  
+
+| N_bkg | N_sig | Limit with bug, &sigma;(eff) = 1 GeV | Limit after fix &sigma;(eff) = 1.6 GeV| L(bug)-L(fix))/L(bug) | compare to &Sqrt;1.6/&Sqrt;1 - 1 |
+|-|-|-|-|-|-|
+| 100 | 2 | 3.27 | 3.77 | 0.153 | 0.265 |
+| 120 | 2 | 3.52 | 4.05 | 0.151 | 0.265 |
+| 140 | 2 | 3.73 | 4.33 | 0.159 | 0.265 |
+| 100 | 4 | 1.63 | 1.88 | 0.153 | 0.265 |
+| **120** | **4** | **1.76** | **2.02** | **0.151** | **0.265** |
+| 140 | 4 | 1.87 | 2.16 | 0.159 | 0.265 |
+| 100 | 6 | 1.09 | 1.26 | 0.158 | 0.265 |
+| 120 | 6 | 1.17 | 1.36 | 0.161 | 0.265 |
+| 140 | 6 | 1.24 | 1.44 | 0.164 | 0.265 |
+
+The 15% difference is what we also observe in the analysis. 
+
+Note: the width of 1.0 and 1.6 GeV are the _effective sigmas_ of the PDF. In the case of
+Crystal Ball function with large tails, a simple scaling with the &sigma;_eff breaks.
+
+
+
+
+
+
+
+
+
+
+
diff --git a/toyStudy/card_cut_n_count.txt b/toyStudy/card_cut_n_count.txt
@@ -0,0 +1,16 @@
+imax 1
+jmax 1
+kmax *
+
+bin bin1
+observation 5
+
+bin	    bin1	bin1
+process	    sig		bkg
+process	    0		1
+rate	    %SIG%	%BKG%
+
+
+lumi lnN    1.03	-
+sig_sys lnN   1.10	-
+bkg_sys lnN   -		1.10
diff --git a/toyStudy/card_shape_n_roll.txt b/toyStudy/card_shape_n_roll.txt
@@ -0,0 +1,24 @@
+imax 1
+jmax 1
+kmax *
+---------------
+shapes * * ws.root w:$PROCESS
+#shapes sig bin1 ws.root w:sig
+#shapes bkg bin1 ws.root w:bkg
+#shapes data_obs bin1 ws.root w:data_obs
+---------------
+bin bin1
+observation -1
+------------------------------
+bin          bin1       bin1
+process      sig        bkg
+process      0          1
+rate	    %SIG%	%BKG%
+--------------------------------
+
+lumi lnN	1.03	-
+sig_sys lnN   	1.10	-
+
+sigma   param   %SIGMA%      0.01
+
+nuisance edit freeze sigma
diff --git a/toyStudy/figs/fig_bkg_mgg.png b/toyStudy/figs/fig_bkg_mgg.png
diff --git a/toyStudy/figs/fig_cut_n_count.png b/toyStudy/figs/fig_cut_n_count.png
diff --git a/toyStudy/figs/fig_gen_CB_bug.png b/toyStudy/figs/fig_gen_CB_bug.png
diff --git a/toyStudy/figs/fig_gen_CB_fix.png b/toyStudy/figs/fig_gen_CB_fix.png
diff --git a/toyStudy/figs/fig_gen_bkg.png b/toyStudy/figs/fig_gen_bkg.png
diff --git a/toyStudy/figs/fig_scale_with_sigma.png b/toyStudy/figs/fig_scale_with_sigma.png
diff --git a/toyStudy/hhbbgg.inputbkg_13TeV.root b/toyStudy/hhbbgg.inputbkg_13TeV.root
diff --git a/toyStudy/hhbbgg.inputbkg_13TeV_bugged.root b/toyStudy/hhbbgg.inputbkg_13TeV_bugged.root
diff --git a/toyStudy/hhbbgg.mH125_13TeV.inputsig.root b/toyStudy/hhbbgg.mH125_13TeV.inputsig.root
diff --git a/toyStudy/hhbbgg.mH125_13TeV.inputsig_bugged.root b/toyStudy/hhbbgg.mH125_13TeV.inputsig_bugged.root
diff --git a/toyStudy/readWS.py b/toyStudy/readWS.py
@@ -0,0 +1,150 @@
+#! /usr/bin/env python
+
+#import os,sys
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import poisson 
+
+from ROOT import *
+gROOT.SetBatch()
+
+#
+# Read the workspaces:
+#
+
+# These are the workspaces with photon correction fixed:
+rooWsSig = TFile('hhbbgg.mH125_13TeV.inputsig.root')
+rooWsBkg = TFile('hhbbgg.inputbkg_13TeV.root')
+
+# These are bugged workspaces:
+# rooWsSig = TFile('hhbbgg.mH125_13TeV.inputsig_bugged.root')
+# rooWsBkg = TFile('hhbbgg.inputbkg_13TeV_bugged.root')
+
+
+sigWs = rooWsSig.Get('w_all')
+sigWs.Print()
+bkgWs = rooWsBkg.Get('w_all')
+bkgWs.Print()
+
+mgg = sigWs.var('mgg')
+mjj = sigWs.var('mjj')
+
+# 
+# Now, let's get the shape of the signal from the analysis workspace
+# These shapes are from High-Mass High-Purity category (the most sensitive one):
+
+anaSig_mgg  = sigWs.pdf("mggSig_cat0_CMS_sig_cat2")
+anaSig_mjj  = sigWs.pdf("mjjSig_cat0_CMS_sig_cat2")
+anaSig_prod = sigWs.pdf("CMS_sig_cat2") # This is a product of the two PDFs above
+sigDataSet = sigWs.data("Sig_cat2")
+
+# Printout the parameters of the PDFs:
+l0 = RooArgSet(mgg,mjj)
+pars = anaSig_prod.getParameters(l0)
+pars.Print()
+parsiter = pars.createIterator()
+var=parsiter.Next()
+while var!=None:
+    print '%s: %.3f  +/ %.3f'%(var.GetName(), var.getVal(), var.getError())
+    var=parsiter.Next()
+
+
+# Make some plots:
+
+c = TCanvas("c","c",0,0,900,600)
+c.cd()
+
+mgg.setRange('signal',118,135)
+mjj.setRange('signal', 70,190)
+
+mggFrame = mgg.frame(RooFit.Range("signal"))
+sigDataSet.plotOn(mggFrame, RooFit.Binning(80))
+anaSig_mgg.plotOn(mggFrame, RooFit.Name("Sig_mgg"), RooFit.LineColor(kRed+1), RooFit.LineWidth(2))
+mggFrame.Draw()
+c.SaveAs('tmpfig_sig_mgg.png')
+
+
+mjjFrame = mjj.frame(RooFit.Range("signal"))
+sigDataSet.plotOn(mjjFrame, RooFit.Binning(30))
+anaSig_mjj.plotOn(mjjFrame, RooFit.Name("Sig_mjj"), RooFit.LineColor(kGreen+1), RooFit.LineWidth(2))
+mjjFrame.Draw()
+c.SaveAs('tmpfig_sig_mjj.png')
+
+
+# 
+# Now, let's get the shape of the background from the workspace
+# 
+
+mgg = bkgWs.var('mgg')
+mjj = bkgWs.var('mjj')
+
+anaBkg_mgg = bkgWs.pdf("mggBkgTmpBer1_cat0_CMS_Bkg_cat2")
+anaBkg_mjj = bkgWs.pdf("mjjBkgTmpBer1_cat0_CMS_Bkg_cat2")
+anaBkg_mgg.Print()
+anaBkg_mjj.Print()
+
+realData = bkgWs.data("data_obs_cat2")
+
+
+print 'Some checks'
+#print anaBkg_mgg.getVal(), anaBkg_mgg.getVal(RooArgSet(mgg)), anaBkg_mgg.getNorm()
+#print anaBkg_mjj.getVal(), anaBkg_mjj.getVal(RooArgSet(mjj)), anaBkg_mjj.getNorm()
+
+mgg.setVal(125)
+mjj.setVal(125)
+
+print anaBkg_mgg.getVal(), anaBkg_mgg.getVal(RooArgSet(mgg)), anaBkg_mgg.getNorm()
+print anaBkg_mjj.getVal(), anaBkg_mjj.getVal(RooArgSet(mjj)), anaBkg_mjj.getNorm()
+# Results:
+# 9.1609403468 0.0162331093879 1.0
+# 6.4453044092 0.0072075547166 1.0
+
+
+# Evaluating the PDFs at 125 GeV
+N_data = realData.numEntries()
+# print N_data
+dNmgg = N_data*anaBkg_mgg.getVal(RooArgSet(mgg))
+dNmjj = N_data*anaBkg_mjj.getVal(RooArgSet(mjj))
+
+# Sigma_effectives times 2
+dMgg = 2*1.6
+dMjj = 2*18.3
+
+DeltaN = dNmgg*dNmjj*dMgg*dMjj
+
+print 'Bkg PDF at 125:'
+print '\t mgg=', dNmgg, 'mjj=', dNmjj, 'dN/{dm_gg*d_mjj} =', dNmgg*dNmjj
+print "DeltaN = ", DeltaN, ", sqrt(DeltaN) =", np.sqrt(DeltaN)
+
+# Results are:
+#         mgg= 1.91550690777 mjj= 0.850491456559 dN/{dm_gg*d_mjj} = 1.62912226004
+# DeltaN =  190.802799096 , sqrt(DeltaN) = 13.8131386403
+
+
+# Would expect this guy to be equal to dNmgg*dNmjj, but it isnt:
+# print "another check:", N_dataanaSig_prod.getVal(RooArgSet(mgg, mjj))
+
+
+# From Pasqualle:
+# UL = poisson_inv_cdf(mu=mu_bkg, p=95%)
+for mu in [dNmgg*dMgg, dNmjj*dMjj, dNmgg*dNmjj*dMgg*dMjj]:
+    UL = poisson.ppf(0.95, mu)
+    print "UL from Poisson ppf = ", UL, 'for mu =', mu 
+
+
+# Make a plot of backgrounds as well:
+fakeData = anaBkg_mgg.generate(RooArgSet(mgg), 120)
+
+mggFrame = mgg.frame()
+realData.plotOn(mggFrame, RooFit.Binning(80), RooFit.Name('data1'))
+#fakeData.plotOn(mggFrame, RooFit.Binning(80), RooFit.Name('data1'))
+anaBkg_mgg.plotOn(mggFrame, RooFit.Name("Bkg_mgg"), RooFit.LineColor(kBlue), RooFit.LineWidth(2), RooFit.FillColor(kCyan-6))
+mggFrame.Draw()
+
+#func = mggFrame.findObject("Bkg_mgg")
+#print "Eval from func =", func.Eval(125)
+
+c.SaveAs('tmpfig_bkg.png')
+
+
+