From bec6c99e08143bdb83cb354e7950d537a9fba7d2 Mon Sep 17 00:00:00 2001 From: Andrey Pozdnyakov Date: Fri, 28 Oct 2022 12:49:06 +0200 Subject: [PATCH] low-pt selection for MiNNLO --- cofeGeno.py | 135 +++++++++++++++++++++++------------------- mc_vjets_samples.info | 4 +- mc_vjets_samples.list | 10 ++-- 3 files changed, 84 insertions(+), 65 deletions(-) diff --git a/cofeGeno.py b/cofeGeno.py index b046fb4..82f1a61 100755 --- a/cofeGeno.py +++ b/cofeGeno.py @@ -14,6 +14,8 @@ from Coffea_NanoGEN_schema import NanoGENSchema import sampleInfo as si +scaleout=200 + def isClean(obj_A, obj_B, drmin=0.4): # From: https://github.com/oshadura/topcoffea/blob/master/topcoffea/modules/objects.py objB_near, objB_DR = obj_A.nearest(obj_B, return_metric=True) @@ -72,9 +74,10 @@ def process(self, events): particles = events.GenPart #leptons = particles[ (np.abs(particles.pdgId) == 13) & (particles.status == 1) & (np.abs(particles.eta)<2.5) ] - leptons = particles[ ((np.abs(particles.pdgId) == 11) | (np.abs(particles.pdgId) == 13) ) & + leptons = particles[ ((np.abs(particles.pdgId) == 11) | (np.abs(particles.pdgId) == 13) ) & + ak.fill_none( (np.abs(particles.parent.pdgId) != 15), True) & (particles.status == 1) & (np.abs(particles.eta)<2.5) & (particles.pt>20) ] - + genjets = events.GenJet jets25 = genjets[ (np.abs(genjets.eta) < 2.5) & (genjets.pt > 25) ] @@ -83,7 +86,7 @@ def process(self, events): (np.abs(LHEP.pdgId) == 4) | (np.abs(LHEP.pdgId) == 5) | (np.abs(LHEP.pdgId) == 21 ) ) & (LHEP.status==1) ] LHE_Njets = ak.num(LHEjets) - + if dataset in ['DYJets_inc_FXFX','DYJets_MiNNLO_Mu_Supp']: weight_nosel = events.genWeight @@ -108,7 +111,7 @@ def process(self, events): pt25 = ((dileptons['i0'].pt > 25) | (dileptons['i1'].pt > 25)) Zmass_cut = (((dileptons['i0'] + dileptons['i1']).mass - 91.19) < 15) - Vpt_cut = ( (dileptons['i0'] + dileptons['i1']).pt > 100 ) + Vpt_cut = ( (dileptons['i0'] + dileptons['i1']).pt > 10 ) dileptonMask = pt25 & Zmass_cut & Vpt_cut good_dileptons = dileptons[dileptonMask] @@ -134,52 +137,62 @@ def process(self, events): good_jets = jets25[j_isclean] two_jets = (ak.num(good_jets) >= 2) - output['njet25'].fill(dataset=dataset, njet25=ak.num(good_jets), weight=weight_nosel) - LHE_Njets_cut = (LHE_Njets>=0) - full_selection = two_lep & two_jets & LHE_vpt_cut & LHE_Njets_cut + #LHE_Njets_cut = (LHE_Njets>=0) + selection_2l = two_lep + selection_2l2j = two_lep & two_jets & LHE_vpt_cut #full_selection = two_lep & two_jets & Vpt_cut #full_selection = two_lep & two_jets & LHE_vpt_cut & vmass_cut #full_selection = two_lep & two_jets & vpt_cut & vmass_cut - selected_events = events[full_selection] - output['cutflow'][dataset]["selected_events"] += len(selected_events) + events_2l = events[selection_2l] + events_2l2j = events[selection_2l2j] - dijets = good_jets[full_selection] - dijet = dijets[:, 0] + dijets[:, 1] - - dijet_pt = dijet.pt - dijet_m = dijet.mass - dijet_dr = dijets[:, 0].delta_r(dijets[:, 1]) + output['cutflow'][dataset]["events_2l"] += len(events_2l) + output['cutflow'][dataset]["events_2l2j"] += len(events_2l2j) if dataset in ['DYJets_inc_FXFX','DYJets_MiNNLO_Mu_Supp']: - weight = selected_events.genWeight + weight_full = events_2l2j.genWeight + weight_2l = events_2l.genWeight else: - weight = np.sign(selected_events.genWeight) - #weight = np.ones(len(selected_events)) - weight2 = np.repeat(np.array(weight),2) + weight_full = np.sign(events_2l2j.genWeight) + weight_2l = np.sign(events_2l.genWeight) + #weight = np.ones(len(events_2l2j)) + weight2_full = np.repeat(np.array(weight_full),2) + weight2_2l = np.repeat(np.array(weight_2l),2) #print("weight length:", len(weight), len(weight2)) #print(leptons.eta[full_selection][:,0:2]) - output['dilep_m'].fill(dataset=dataset, dilep_m=ak.flatten(vmass[full_selection]), weight=weight) - output['dilep_pt'].fill(dataset=dataset, dilep_pt=ak.flatten(vpt[full_selection]), weight=weight) - output['lep_eta'].fill(dataset=dataset, lep_eta=ak.flatten(leptons.eta[full_selection][:,0:2]), weight=weight2) - output['lep_pt'].fill(dataset=dataset, lep_pt=ak.flatten(leptons.pt[full_selection][:,0:2]), weight=weight2) + output['njet25'].fill(dataset=dataset, njet25=ak.num(good_jets[selection_2l]), weight=weight_2l) - output['jet_eta'].fill(dataset=dataset, jet_eta=ak.flatten(good_jets.eta[full_selection][:,0:2]), weight=weight2) - output['jet_pt'].fill(dataset=dataset, jet_pt=ak.flatten(good_jets.pt[full_selection][:,0:2]), weight=weight2) + dijets = good_jets[selection_2l2j] + dijet = dijets[:, 0] + dijets[:, 1] - output['dijet_dr'].fill(dataset=dataset, dijet_dr=dijet_dr, weight=weight) - output['dijet_m'].fill(dataset=dataset, dijet_m=dijet_m, weight=weight) - output['dijet_pt'].fill(dataset=dataset, dijet_pt=dijet_pt, weight=weight) + dijet_pt = dijet.pt + dijet_m = dijet.mass + dijet_dr = dijets[:, 0].delta_r(dijets[:, 1]) + + + output['dilep_m'].fill(dataset=dataset, dilep_m=ak.flatten(vmass[selection_2l2j]), weight=weight_full) + output['dilep_pt'].fill(dataset=dataset, dilep_pt=ak.flatten(vpt[selection_2l2j]), weight=weight_full) + + output['lep_eta'].fill(dataset=dataset, lep_eta=ak.flatten(leptons.eta[selection_2l2j][:,0:2]), weight=weight2_full) + output['lep_pt'].fill(dataset=dataset, lep_pt=ak.flatten(leptons.pt[selection_2l2j][:,0:2]), weight=weight2_full) + + output['jet_eta'].fill(dataset=dataset, jet_eta=ak.flatten(good_jets.eta[selection_2l2j][:,0:2]), weight=weight2_full) + output['jet_pt'].fill(dataset=dataset, jet_pt=ak.flatten(good_jets.pt[selection_2l2j][:,0:2]), weight=weight2_full) + + output['dijet_dr'].fill(dataset=dataset, dijet_dr=dijet_dr, weight=weight_full) + output['dijet_m'].fill(dataset=dataset, dijet_m=dijet_m, weight=weight_full) + output['dijet_pt'].fill(dataset=dataset, dijet_pt=dijet_pt, weight=weight_full) #print("Negative DRs:", dijet_dr[weight<0]) #print("Negative wei:", weight[weight<0]) - neg_wei = np.abs(weight[weight<0]) - neg_wei_dr = dijet_dr[weight<0] + neg_wei = np.abs(weight_full[weight_full<0]) + neg_wei_dr = dijet_dr[weight_full<0] output['dijet_dr_neg'].fill(dataset=dataset, dijet_dr=neg_wei_dr, weight=neg_wei) return output @@ -202,7 +215,7 @@ def postprocess(self, accumulator): } if self.verblvl>0: print("weights = ", weights) - + for key in accumulator: if key not in ['cutflow','sumw']: accumulator[key].scale(weights, axis='dataset') @@ -211,11 +224,11 @@ def postprocess(self, accumulator): '2017_DY 1+2j': ['2017_DY1J', '2017_DY2J'], }) elif self.proc_type=="ul": - sampleInfo = si.ReadSampleInfoFile('mc_vjets_samples.info') + sampleInfo = si.ReadSampleInfoFile('mc_vjets_samples.info') weights = {sname : lumi*sampleInfo[sname]['xsec']*sampleInfo[sname]['kfac']/accumulator['sumw'][sname] for sname in accumulator['sumw'].keys()} if self.verblvl>0: print("weights = ", weights) - + for key in accumulator: if key not in ['cutflow','sumw']: accumulator[key].scale(weights, axis='dataset') @@ -230,7 +243,7 @@ def postprocess(self, accumulator): 'DYJets_HERWIG': ['DYJets_HERWIG'], }) - + return accumulator @@ -296,7 +309,7 @@ def plot(histograms, outdir, proc_type, fromPickles=False): fig, (ax, rax) = plt.subplots(nrows=2, ncols=1, figsize=(7,7), gridspec_kw={"height_ratios": (2, 1)},sharex=True) fig.subplots_adjust(hspace=.07) - + hist.plot1d(histogram, overlay='ds_scaled', ax=ax, line_opts={"color":['C2','C1','C0']}, overflow='none') ax.set_ylim(0, None) if obs_axis in ['LHE_HT','wei']: @@ -318,12 +331,10 @@ def plot(histograms, outdir, proc_type, fromPickles=False): denom = histogram[samp2[0]].project(obs_axis), error_opts={'color': 'c', 'marker': 'o'}, ax=rax, - denom_fill_opts={}, - guide_opts={}, unc='num', label=samp1[1]+"/"+samp2[1] ) - + hist.plotratio(num = histogram[samp1[0]].project(obs_axis), denom = histogram[samp3[0]].project(obs_axis), error_opts={'color': 'brown', 'marker': 'v'}, @@ -337,6 +348,8 @@ def plot(histograms, outdir, proc_type, fromPickles=False): denom = histogram[samp3[0]].project(obs_axis), error_opts={'color': 'm', 'marker': '>'}, ax=rax, + denom_fill_opts={}, + guide_opts={}, clear = False, label=samp2[1]+"/"+samp3[1], unc='num' @@ -345,21 +358,21 @@ def plot(histograms, outdir, proc_type, fromPickles=False): rax.set_ylabel('Ratios') rax.set_ylim(0.6,1.6) - + else: - + #hist.plot1d(histogram, overlay='dataset', line_opts={}, overflow='none') #plt.gca().autoscale() - - + + fig, (ax, rax) = plt.subplots(nrows=2, ncols=1, figsize=(7,7), gridspec_kw={"height_ratios": (3, 1)},sharex=True) fig.subplots_adjust(hspace=.07) - + hist.plot1d(histogram, overlay='ds_scaled', ax=ax, line_opts={}, overflow='none') ax.set_ylim(0, None) - + leg = ax.legend() print(histogram["2016_DY 1+2j"].axes()) hist.plotratio(num = histogram["2017_DY 1+2j"].project(obs_axis), @@ -370,10 +383,10 @@ def plot(histograms, outdir, proc_type, fromPickles=False): guide_opts={}, unc='num' ) - + rax.set_ylabel('2017/2016 Ratio') rax.set_ylim(0.5,1.5) - + else: print("axes= ", axes) print("This should not happen. I'm not sure what to do.") @@ -458,18 +471,18 @@ def main(): file_list = { sname: si.makeListOfInputRootFilesForProcess(sname, sampleInfo, pkl_file, xroot, lim=opt.numberOfFiles, checkOpen=False) for sname in sampleInfo } - + #file_list['DYJets_MiNNLO_Mu_Supp'] = si.makeListOfInputRootFilesForProcess("DYJets_MiNNLO_Mu_Supp", sampleInfo, pkl_file, xroot, lim=20, checkOpen=True) #file_list = {'DYJets_HERWIG': [#'~/work/DYToLL_NLO_5FS_TuneCH3_13TeV_matchbox_herwig7_cff_py_GEN_NANOGEN.root', # '~/work/DYToLL_NLO_5FS_TuneCH3_13TeV_matchbox_herwig7_cff_py_GEN_NANOGEN_inNANOAODGEN.root']} - + ''' file_list = { 'DYJets_inc_MLM': si.makeListOfInputRootFilesForProcess("DYJets_inc_MLM", sampleInfo, pkl_file, xroot, lim=opt.numberOfFiles), 'DYJets_inc_FXFX': si.makeListOfInputRootFilesForProcess("DYJets_inc_FXFX", sampleInfo, pkl_file, xroot, lim=opt.numberOfFiles), 'DYJets_inc_MiNNLO_Mu': si.makeListOfInputRootFilesForProcess("DYJets_inc_MiNNLO_Mu", sampleInfo, pkl_file, xroot, lim=opt.numberOfFiles), 'DYJets_inc_MiNNLO_El': si.makeListOfInputRootFilesForProcess("DYJets_inc_MiNNLO_El", sampleInfo, pkl_file, xroot, lim=opt.numberOfFiles), - + 'DYJets_0J': si.makeListOfInputRootFilesForProcess("DYJets_0J", sampleInfo, pkl_file, xroot, lim=opt.numberOfFiles), 'DYJets_1J': si.makeListOfInputRootFilesForProcess("DYJets_1J", sampleInfo, pkl_file, xroot, lim=opt.numberOfFiles), 'DYJets_2J': si.makeListOfInputRootFilesForProcess("DYJets_2J", sampleInfo, pkl_file, xroot, lim=opt.numberOfFiles), @@ -486,7 +499,7 @@ def main(): from dask_jobqueue import HTCondorCluster cluster = HTCondorCluster(cores=24, memory="4GB", disk="4GB") cluster.scale(jobs=10) # ask for 10 jobs - + from dask.distributed import Client #client = Client(n_workers=4, threads_per_worker=2) client = Client(cluster) @@ -500,7 +513,7 @@ def main(): ) elif opt.executor=="parsl": - + try: from os import popen, environ, getcwd @@ -511,7 +524,7 @@ def main(): print(_x509_localpath) _x509_path = environ['HOME'] + f'/.{_x509_localpath.split("/")[-1]}' system(f'cp {_x509_localpath} {_x509_path}') - + env_extra = [ 'export XRD_RUNFORKHANDLER=1', f'export X509_USER_PROXY={_x509_path}', @@ -519,23 +532,25 @@ def main(): f"export PYTHONPATH=$PYTHONPATH:{getcwd()}", ] condor_extra = [ - 'source ~/work/vjets/conda_setup.sh', + f'echo {getcwd()}', + f'ls {getcwd()}', + f'source {getcwd()}/CondaSetup.sh', 'conda activate coffea37', 'echo LETSGO' ] - + import parsl from parsl.config import Config from parsl.executors import HighThroughputExecutor from parsl.providers import CondorProvider - from parsl.addresses import address_by_hostname, address_by_query - + from parsl.addresses import address_by_hostname, address_by_query + # For local executor #from parsl.app.app import python_app, bash_app #from parsl.configs.local_threads import config #parsl.load(config) - + htex_config = Config( executors=[ HighThroughputExecutor( @@ -544,8 +559,8 @@ def main(): max_workers=1, provider=CondorProvider( nodes_per_block=1, - init_blocks=20, - max_blocks=600, + init_blocks=scaleout, + max_blocks=scaleout+10, scheduler_options='should_transfer_files = YES\n transfer_output_files = ""\n', worker_init="\n".join(env_extra + condor_extra), walltime="00:50:00", @@ -576,7 +591,7 @@ def main(): executor = processor.futures_executor, executor_args = {'schema': NanoGENSchema, "workers":10} ) - + diff --git a/mc_vjets_samples.info b/mc_vjets_samples.info index 514251d..24c639d 100644 --- a/mc_vjets_samples.info +++ b/mc_vjets_samples.info @@ -4,9 +4,11 @@ name=DYJets_inc_MiNNLO_Mu dir=/DYJetsToMuMu_M-50_massWgtFix_TuneCP5_13TeV-powheg #name=DYJets_inc_MiNNLO_El dir=/DYJetsToEE_M-50_massWgtFix_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos xsec=1976 kfac=1.0 #name=DYJets_HERWIG dir=/Local xsec=6430 kfac=1.0 # File produced by Kostas #name=DYJets_HERWIG dir=/DYJetsToLL_M-50_TuneCH3_13TeV-madgraphMLM-herwig7 xsec=3200 kfac=1.0 + #name=DYJets_MiNNLO_Mu_Supp dir=/ZJ_bornsuppfact_MiNNLO_TuneCP5_Test2_RunIISummer20UL xsec=1901 kfac=2.0 #name=DYJets_MiNNLO_Mu_Supp dir=/DYJetsToMuMu_BornSuppress_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos xsec=1901 kfac=2.0 -name=DYJets_MiNNLO_Mu_Supp dir=/DYJetsToMuMu_BornSuppressV2_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos xsec=1901 kfac=2.0 +#name=DYJets_MiNNLO_Mu_Supp dir=/DYJetsToMuMu_BornSuppressV2_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos xsec=1901 kfac=2.0 +name=DYJets_MiNNLO_Mu_Supp dir=/DYJetsToMuMu_BornSuppressV3_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos xsec=1901 kfac=2.10 #name=DYJets_0J dir=/DYJetsToLL_0J_TuneCP5_13TeV-amcatnloFXFX-pythia8 xsec=5127 kfac=1.0 #name=DYJets_1J dir=/DYJetsToLL_1J_TuneCP5_13TeV-amcatnloFXFX-pythia8 xsec=958.2 kfac=1.0 diff --git a/mc_vjets_samples.list b/mc_vjets_samples.list index 16e9fbc..9edb2bd 100644 --- a/mc_vjets_samples.list +++ b/mc_vjets_samples.list @@ -1,7 +1,8 @@ -/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL17NanoAODv2-106X_mc2017_realistic_v8-v1/NANOAODSIM -/DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8/RunIISummer20UL17NanoAODv2-106X_mc2017_realistic_v8-v1/NANOAODSIM -/DYJetsToMuMu_M-50_massWgtFix_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL17NanoAODv2-106X_mc2017_realistic_v8-v1/NANOAODSIM -/DYJetsToEE_M-50_massWgtFix_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL17NanoAODv2-106X_mc2017_realistic_v8-v1/NANOAODSIM +/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v1/NANOAODSIM +#/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9_ext1-v1/NANOAODSIM +/DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v2/NANOAODSIM +/DYJetsToMuMu_M-50_massWgtFix_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v2/NANOAODSIM +/DYJetsToEE_M-50_massWgtFix_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v2/NANOAODSIM /DYJetsToLL_0J_TuneCP5_13TeV-amcatnloFXFX-pythia8/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v1/NANOAODSIM /DYJetsToLL_1J_TuneCP5_13TeV-amcatnloFXFX-pythia8/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v1/NANOAODSIM /DYJetsToLL_2J_TuneCP5_13TeV-amcatnloFXFX-pythia8/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v1/NANOAODSIM @@ -37,3 +38,4 @@ /ZJ_bornsuppfact_MiNNLO_TuneCP5_Test2_RunIISummer20UL/andrey-MiNNLO-13Jun2022-00000000000000000000000000000000/USER /DYJetsToMuMu_BornSuppress_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/andrey-RunIISummer20UL-13Jun2022-00000000000000000000000000000000/USER /DYJetsToMuMu_BornSuppressV2_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/andrey-RunIISummer20UL-2022Jun20-00000000000000000000000000000000/USER +/DYJetsToMuMu_BornSuppressV3_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/andrey-RunIISummer20UL-2022Jun27-00000000000000000000000000000000/USER \ No newline at end of file