Merge pull request cms-tau-pog#5 from oponcet/PODAS23

update readme for combine part, fix some instructions and add a simpl…
oponcet · Oct 13, 2023 · f5271a2 · f5271a2
2 parents 47d4c0d + 9e76948
commit f5271a2
Show file tree

Hide file tree

Showing 4 changed files with 230 additions and 15 deletions.
diff --git a/Fitter/TauES_ID/config/Default_FitSetupTES_mutau_DM.yml b/Fitter/TauES_ID/config/Default_FitSetupTES_mutau_DM.yml
@@ -1,18 +1,19 @@
-## Config file for default TES and tid SF fit in mutau channel by DM 
-## contact: [email protected] and [email protected]
+#
+## Config file for default TES fit in mutau channel
+## contact: [email protected]
 ##
 
 # NOTE: In general, the main information (channel, baselineCuts, etc.) should be given.
-# More specific sub-options (like weight replacement for systematic uncertainties) are optional
+#       More specific sub-options (like weight replacement for systematic uncertainties) are optional
 
 
-# Typically, each channel have its own config file; combination can be done at datacard level
+# Typically, each channel will have its own config file; combination can be done at datacard level
 
 channel: mutau
 
 # Tag can be used to differentiate between scenarios
 
-tag: "_mutau_mt65_DM_Dt2p5_default"
+tag: "_mutau_mt65_DM_Dt2p5_DAS23_VSJetMedium"
 
 
 baselineCuts: "q_1*q_2<0 && iso_1<0.15 && idDecayModeNewDMs_2 && idDeepTau2018v2p5VSjet_2>=5 && idDeepTau2018v2p5VSe_2>=2 && idDeepTau2018v2p5VSmu_2>=4 && !lepton_vetoes_notau && metfilter "
@@ -44,6 +45,8 @@ regions:
         title: "h^{#pm}h^{#mp}h^{#pm}#pi^{0}"
 
 plottingOrder: ["DM11", "DM10","DM1", "DM0"]
+#plottingOrder: ["DM11"]
+
 
 tesRegions:
     DM0: 
@@ -77,12 +80,16 @@ observables:
             cut: "50<m_vis && m_vis<150"
         fitRegions: [DM0, DM1, DM10, DM11]
         scanRegions: ["DM0", "DM1", "DM10", "DM11"] 
-
+        # fitRegions: ["DM11"]
+        # scanRegions: ["DM11"] 
 
 # Assume that this code is meant to fit TES variations; more options could be added when adding other POIs
 
 TESvariations: 
-    values: [0.970, 0.971, 0.972, 0.973, 0.974, 0.975, 0.976, 0.977, 0.978, 0.979, 0.980, 0.981, 0.982, 0.983, 0.984, 0.985, 0.986, 0.987, 0.988, 0.989, 0.990, 0.991, 0.992, 0.993, 0.994, 0.995, 0.996, 0.997, 0.998, 0.999, 1.000, 1.001, 1.002, 1.003, 1.004, 1.005, 1.006, 1.007, 1.008, 1.009, 1.010, 1.011, 1.012, 1.013, 1.014, 1.015, 1.016, 1.017, 1.018, 1.019, 1.020, 1.021, 1.022, 1.023, 1.024, 1.025, 1.026, 1.027, 1.028, 1.029, 1.030]
+    #values: [0.970, 0.971, 0.972, 0.973, 0.974, 0.975, 0.976, 0.977, 0.978, 0.979, 0.980, 0.981, 0.982, 0.983, 0.984, 0.985, 0.986, 0.987, 0.988, 0.989, 0.990, 0.991, 0.992, 0.993, 0.994, 0.995, 0.996, 0.997, 0.998, 0.999, 1.000, 1.001, 1.002, 1.003, 1.004, 1.005, 1.006, 1.007, 1.008, 1.009, 1.010, 1.011, 1.012, 1.013, 1.014, 1.015, 1.016, 1.017, 1.018, 1.019, 1.020, 1.021, 1.022, 1.023, 1.024, 1.025, 1.026, 1.027, 1.028, 1.029, 1.030]
+    #values: [0.970, 0.972, 0.974, 0.976, 0.978, 0.980, 0.982, 0.984, 0.986, 0.988, 0.990, 0.992, 0.994, 0.996, 0.998, 1.000, 1.002, 1.004, 1.006, 1.008, 1.010, 1.012, 1.014, 1.016, 1.018, 1.020, 1.022,1.024, 1.026, 1.028, 1.030] #1.024
+    values: [0.970, 0.980, 0.990, 1.000, 1.010, 1.020, 1.030]
+
     processes: ["ZTT"]
 
 # Should add bin-by-bin variations (stat. uncertainties) for signal and backgrounds?
@@ -102,10 +109,12 @@ samples:
     split:
         DY: [ ["ZTT","genmatch_2==5"], ["ZL","genmatch_2>0 && genmatch_2<5"], ["ZJ","genmatch_2==0"] ]
         TT: [["TTT","genmatch_2==5"], ["TTL","genmatch_2>0 && genmatch_2<5"], ["TTJ","genmatch_2==0"] ]
+        #ST: [("STT","genmatch_2==5"),("STJ","genmatch_2<5")]
     rename:
         WJ: "W"
     data: "data_obs"
     removeSFs: ["idweight_2"]
+    #addSFs: []
 
 
 # # Processes taken into account in analysis
@@ -153,10 +162,10 @@ systematics:
         effect: "lnN"
         processes: ["ZTT", "ZL", "ZJ", "TTT", "TTL", "TTJ", "W", "ST", "VV"]
         scaleFactor: 1.02
-    # xsec_dy:
-    #     effect: "lnN"
-    #     processes: ["ZTT", "ZL", "ZJ"]
-    #     scaleFactor: 1.02
+    xsec_dy:
+        effect: "lnN"
+        processes: ["ZTT", "ZL", "ZJ"]
+        scaleFactor: 1.02
     xsec_tt:
         effect: "lnN"
         processes: ["TTT", "TTL", "TTJ"]
@@ -185,7 +194,14 @@ systematics:
 # Additional scale factors per year for specific processes (correction of xsec, reco SFs, etc.)
 
 # scaleFactors:
+#     WNFs:
+#         processes: ["W"]
+#         values:
+#             'UL2016_preVFP': { 'dm_2==0': 0.939,  'dm_2==1': 1.031,  'dm_2==10': 1.065,  'dm_2==11': 1.021  }
+#             'UL2016_postVFP': { 'dm_2==0': 0.919,  'dm_2==1': 0.972,  'dm_2==10': 1.112,  'dm_2==11': 1.074  }
+#             'UL2017': { 'dm_2==0': 0.971,  'dm_2==1': 0.995,  'dm_2==10': 1.109,  'dm_2==11': 1.075  }
+#             'UL2018': { 'dm_2==0': 1.007,  'dm_2==1': 0.968,  'dm_2==10': 1.079,  'dm_2==11': 1.044  }
 #     idFs:    
 #         processes: ["ZTT"]
 #         values:
-#             'UL2018': { 'dm_2==0 && genmatch_2==5': 1,  'dm_2==1 && genmatch_2==5': 1 ,  'dm_2==10 && genmatch_2==5': 1,  ' dm_2==11 && genmatch_2==5': 1 }
+#             'UL2018': { 'dm_2==0 && genmatch_2==5': 0.8667,  'dm_2==1 && genmatch_2==5': 0.8333 ,  'dm_2==10 && genmatch_2==5': 0.8000,  ' dm_2==11 && genmatch_2==5': 0.7667 }
diff --git a/Fitter/TauES_ID/makefit_PODAS23.py b/Fitter/TauES_ID/makefit_PODAS23.py
@@ -0,0 +1,121 @@
+#! /usr/bin/env python
+"""
+Date : October 2023 
+Author : @oponcet 
+Description :
+ - Scan of tes or/and tid SF which is implemented as a rateParamer.
+"""
+
+from distutils import filelist
+from distutils.command.config import config
+import sys
+import os
+import yaml
+from argparse import ArgumentParser
+
+# Generating the datacards for mutau channel
+def generate_datacards_mutau(era, config, extratag):
+    print(' >>>>>> Generating datacards for mutau channel')
+    os.system("./TauES_ID/harvestDatacards_TES_idSF_MCStat.py -y %s -c %s -e %s "%(era,config,extratag)) 
+
+
+# Function to run the fit     
+def run_combined_fit(setup, **kwargs):
+    #tes_range    = kwargs.get('tes_range',    "0.970,1.030")
+    tes_range    = kwargs.get('tes_range',    "%s,%s" %(min(setup["TESvariations"]["values"]), max(setup["TESvariations"]["values"]))                         )
+    tid_SF_range = kwargs.get('tid_SF_range', "0.7,1.3")
+    extratag     = kwargs.get('extratag',     "_DeepTau")
+    algo         = kwargs.get('algo',         "--algo=grid --alignEdges=1 --saveFitResult ")
+    npts_fit     = kwargs.get('npts_fit',     "--points=7")
+    fit_opts     = kwargs.get('fit_opts',     "--robustFit=1 --setRobustFitAlgo=Minuit2 --setRobustFitStrategy=2 --setRobustFitTolerance=0.001 %s" %(npts_fit))
+    xrtd_opts    = kwargs.get('xrtd_opts',    "--X-rtd FITTER_NEW_CROSSING_ALGO --X-rtd FITTER_NE")
+    cmin_opts    = kwargs.get('cmin_opts',    "--cminFallbackAlgo Minuit2,Migrad,0:0.0001 --cminPreScan"                                                 )
+    save_opts    = kwargs.get('save_opts',    "--saveNLL --saveSpecifiedNuis all "                                                                           )
+    era          = kwargs.get('era',          "")
+    workspace = ""
+
+
+
+    # Variable of the fit (usually mvis)
+    variable = "m_vis"
+    ## For each region defined in scanRegions in the config file 
+    for r in setup["observables"]["m_vis"]["scanRegions"]:
+        print("Region : "+r)
+
+        # Binelabel for output file of the fit
+        BINLABELoutput = "mt_"+variable+"-"+r+setup["tag"]+extratag+"-"+era+"-13TeV"
+
+        # For fit by region create the datacards and the workspace here
+        datacardfile = "ztt_mt_m_vis-"+r+setup["tag"]+extratag+"-"+era+"-13TeV"
+        print("datacard file for fit by region = %s" %(datacardfile)) 
+        # Create workspace 
+        os.system("text2workspace.py output_%s/%s.txt" %(era, datacardfile))
+        workspace = "output_%s/%s.root" %(era, datacardfile)
+        print("Datacard workspace has been created")
+
+        ## FIT ##
+
+        # Fit of the POI for each region defined in the config file 
+        # POI = "tes_%s" % (r) # Change your POI here : tes_ or tid_SF_
+        # NP = "rgx{.*tid.*}"
+        POI = "tid_SF_%s" % (r) # Change your POI here : tes_ or tid_SF_
+        NP = "rgx{.*tes.*}"
+        print(">>>>>>> "+POI+" fit")
+        # you can fit tes_ or tid_SF_ (which is not you POI) if you don't want to include it in the fit.
+        # Else it will be combined fit of tes and tid_SF
+        POI_OPTS = "-P %s --redefineSignalPOIs %s --setParameterRanges %s=%s -m 90 --setParameters r=1,tes_%s=1,tid_SF_%s=1 --freezeParameters r " % (POI, POI, POI, tes_range, r, r)  # tes_DM
+        MultiDimFit_opts = " %s %s %s -n .%s %s %s %s %s --trackParameters %s,rgx{.**.},rgx{.*sf_W_*.}" %(workspace, algo, POI_OPTS, BINLABELoutput, fit_opts, xrtd_opts, cmin_opts, save_opts,NP)
+        # Fit with combine
+        os.system("combine -M MultiDimFit  %s" %(MultiDimFit_opts))
+
+
+    os.system("mv higgsCombine*root output_%s"%era)
+
+# Plot the scan using output file of combined 
+def plotScan(setup, **kwargs):
+    tid_SF_range = kwargs.get('tid_SF_range', "0.7,1.3")
+    extratag     = kwargs.get('extratag',     "_DeepTau")
+    era          = kwargs.get('era',          ""        )
+    config       = kwargs.get('config',       ""        )
+
+    # Plot 
+    # do not forget to change the POI and the range
+    os.system("./TauES_ID/plotParabola_POI_region.py -p tid_SF -y %s -e %s -r %s,%s -s -a -c %s" % (era, extratag, 0.7, 1.3, config))
+    #os.system("./TauES_ID/plotPostFitScan_POI.py --poi tes -y %s -e %s -r %s,%s -c %s" %(era,extratag,min(setup["TESvariations"]["values"]),max(setup["TESvariations"]["values"]), config))
+
+
+
+### main function
+def main(args):
+
+    era    = args.era
+    config = args.config
+    extratag     = "_DeepTau"
+
+
+    print("Using configuration file: %s"%(args.config))
+    with open(args.config, 'r') as file:
+        setup = yaml.safe_load(file)
+
+    # Generating the datacards for mutau channel
+    generate_datacards_mutau(era=era, config=config,extratag=extratag)
+
+    # Run the fit using combine
+    run_combined_fit(setup, era=era, config=config)
+
+    # Plots 
+    plotScan(setup, era=era, config=config)
+
+
+###
+if __name__ == '__main__':
+
+    argv = sys.argv
+    parser = ArgumentParser(prog="makeTESfit", description="execute all steps to run TES fit")
+    parser.add_argument('-y', '--era', dest='era', choices=['2016', '2017', '2018', 'UL2016_preVFP','UL2016_postVFP', 'UL2017', 'UL2018','UL2018_v10'], default=['UL2018'], action='store', help="set era")
+    parser.add_argument('-c', '--config', dest='config', type=str, default='TauES_ID/config/defaultFitSetupTES_mutau.yml', action='store', help="set config file containing sample & fit setup")
+    args = parser.parse_args()
+
+    main(args)
+    print(">>>\n>>> done\n")
+
diff --git a/...input/ztt_mt_tes_m_vis.inputs-UL2018_v10-13TeV_mutau_mt65_DM_Dt2p5_DAS23_VSJetMedium.root b/...input/ztt_mt_tes_m_vis.inputs-UL2018_v10-13TeV_mutau_mt65_DM_Dt2p5_DAS23_VSJetMedium.root
diff --git a/docs/CMSPODAS23/main.md b/docs/CMSPODAS23/main.md
@@ -31,7 +31,7 @@ cmsrel $CMSSW
 cd $CMSSW/src
 cmsenv
 cd $CMSSW_BASE/src/
-git clone https://github.com/cardinia/TauFW.git TauFW -b PODAS
+git clone https://github.com/cardinia/TauFW.git TauFW -b PODAS23
 scram b -j4
 cd $CMSSW_BASE/src/
 git clone https://github.com/cms-nanoAOD/nanoAOD-tools.git PhysicsTools/NanoAODTools
@@ -49,8 +49,10 @@ git fetch origin
 git checkout v9.1.0
 cd $CMSSW_BASE/src
 git clone https://github.com/cms-analysis/CombineHarvester.git CombineHarvester
-scramv1 b clean; scramv1 b
+cd CombineHarvester
 git checkout v2.0.0
+scramv1 b clean; scramv1 b
+
 
 ```
 
@@ -67,4 +69,80 @@ For now navigate the tree stored and try to plot the visible mass of the muon+ta
 + DeepTau vsMu
 + Transverse mass
 
-Change the selection to observe the effect of the different DeepTau working points on the distribution.
+Change the selection to observe the effect of the different DeepTau working points on the distribution.
+
+
+## Combine
+
+## Creating inputs
+Inputs are root file used for the creation of the datacards is `Fitter/TauES/createinputsTES.py`.
+These root files are saved in `Fitter/input` folder and named `ztt*.input*tag*.root`. They contain one TDirectory for each `"regions"` defined in the config file (.yml). For each region, there is a list of TH1D corresponding to each `"process"` defined in the config file (ex: ZTT). For each shape systematics, there is also two additionnal TH1D corresponding to the Up and Down variation of the process (ex: ZTT_shapedy_Up). For the TES there is a list of additional TH1D corresponding to the variations (defined by `"TESvariations"` in the config file) of the process by TES correction. 
+
+You can create your own inputs by adding some cuts on several varaibles in the config files.
+
+As an example, the config file in `Fitter/TauES_ID/config/Default_FitSetupTES_mutau_DM.yml` provided the inputs in `Fitter/inputs/ztt_mt_tes_m_vis.inputs-UL2018_v10-13TeV_mutau_mt65_DM_Dt2p5_DAS23_VSJetMedium.root`. You can use them directly to run combine.
+
+:warning: Don't modify this file, create another one based on this example and change the tag to not overwrite the exisiting file.
+
+
+:computer: Example of command :
+
+ ```sh
+  python TauES/createinputsTES.py -y UL2018_v10 -c TauES_ID/config/Default_FitSetupTES_mutau_DM.yml
+  ```
+
+
+## Config file 
+This section provides an overview and explanation of the configuration file for the default tes and tid SF fit in the mutau channel. The config file contains various settings and parameters used in the analysis. For further details and explanations of each parameter, please refer to the specific sections within the config file itself.
+
+The main information such as the channel, baseline cuts, and tag are provided at the beginning of the config file. Additional sub-options, like weight replacement for systematic uncertainties, are optional.
+- `"channel"`: Specifies the channel for the analysis, which is "mutau" or "mumu" in this case.
+- `"tag"`: Allows differentiation between different scenarios or versions.
+- `"baselineCuts"`: Defines the baseline selection cuts for events. It includes various criteria for event selection such as charge correlation, isolation, identification, and additional requirements like lepton vetoes and met filters.
+- `"regions"`: Defines different regions of interest in the analysis. Each region has a specific definition (using cuts or conditions) and a title for identification purposes. One datacard file is created for each region.
+- `"plottingOrder"`: Determines the order in which the defined regions will be plotted. Used by plotParabola_POI.py to make the summary plot of the POI measurements.
+- `"tesRegions"`: Specifies the TES (Tau Energy Scale) regions for the scans. Title are defined.
+- `"tid_SFRegions"`: Specifies the TID (Tau ID) scale factor regions for the scans. Title are defined.
+- `"observables"`: Defines the observables to be fitted and plotted in the analysis. Each observable has its own binning and title.
+  - `"fitRegions"`: for each observable, fit regions are defined.
+  - `"scanRegions"`: for each observable, the region to scan the poi. 
+- `"TESvariations"`: Specifies the different TES variations considered in the analysis. It includes a list of TES values for which the analysis will be performed.
+- `"fitSpecs"`: Defines specifications for bin-by-bin (BBB) systematics. It specifies whether to perform BBB systematics for both signal and background samples.
+- `"samples"` : Specifies the samples to be used in the analysis and their association with different processes. It includes information about the file name format, sample joining, sample splitting, renaming, and removing or adding specific weights or scale factors.
+- `"processes"` : Lists the processes taken into account in the fit.
+- `"systematics"` : Each systematic uncertainty has an effect type (shape or lnN), associated processes, and a scaling factor if applicable.
+- `"scaleFactors"`: Provides additional scale factors per year for specific processes. These scale factors can correct for cross-sections, reconstruction scale factors, and other factors.
+
+## Running the fit :
+
+### Description of the main script `makePODAS23.py`: 
+
+The script `makePODAS23.py` code provides functionality for generating datacards and performing fits in the mutau channels.
+
+The fit is done using [Combine tool](https://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/). See the documentation to change the parameter of the fit. 
+
+You can change the Parameter of Interest (POI) in the code to either scan the TES of the ID SF. 
+
+### Preparing the datacards
+
+Datacards are directly generated in `makePODAS23.py`:
+
+`generate_datacards_mutau(era, config, extratag)`: function that generates datacards for the mutau channel. This function call `harvestDatacards_TES_idSF_MCStat.py`.
+
+
+The input to Combine tool is a datacards file (ztt root and txt files). The datacards are generated for each `"region"` defined in the config file.
+It defines the following information :
+- The tes is defined as a POI for each `"tesRegions"` defined in the config file. Horizontal morphing is used to interpolate between the templates generated for each `"TESvariations"` (defined in config file). 
+- The tid SF is defined as rateParameter for each `"tid_SFRegions"` defined in the config file.
+- If "norm_wj" is not specified in config file, a rateParameter "sf_W" is defined for the W+Jet normalisation. 
+- The `autoMCstat` function is used to have bin-by-bin uncertainties for the sum of all backgrounds.
+
+### Plotting results
+
+The results of the fit are saved in a root file (ex: `higgsCombine*root` in output folder) that can be used to produced several plots. Especially, NLL pararabola and summary plots can be produced via `plotScan(setup, era=era, config=config)` that called `plotParabola_POI_region.py`.
+
+:computer: Example of command to run the TES scans by DM : 
+```sh
+python TauES_ID/makefit_PODAS23.py -y UL2018_v10 -c TauES_ID/config/Default_FitSetupTES_mutau_DM.yml
+ ```
+The TES NLL parabolae and summary plots are automatically generated.