Skip to content

Commit

Permalink
Merge pull request #67 from cms-tau-pog/hackathon
Browse files Browse the repository at this point in the history
Fix HTCondor environment issue, external acces to EOS & more
  • Loading branch information
IzaakWN authored Apr 24, 2024
2 parents 74862a6 + 8a3b984 commit 60facf1
Show file tree
Hide file tree
Showing 12 changed files with 219 additions and 138 deletions.
8 changes: 4 additions & 4 deletions PicoProducer/python/analysis/ModuleMuTau.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ def __init__(self, fname, **kwargs):
self.out.cutflow.addcut('pair', "pair" )
self.out.cutflow.addcut('weight', "no cut, weighted", 15 )
self.out.cutflow.addcut('weight_no0PU', "no cut, weighted, PU>0", 16 ) # use for normalization; bug in pre-UL 2017 caused small fraction of events with nPU<=0
## Important cutflow entries to make stitching with exclusive mutauh sample
# Important cutflow entries to make stitching with exclusive mutauh sample (DYJetsToTauTauToMuTauh)
self.out.cutflow.addcut('weight_mutaufilter', "no cut, mutaufilter", 17 )
self.out.cutflow.addcut('weight_mutaufilter_NUP0orp4', "no cut, weighted, mutau, 0 or >4 jets", 18 )
self.out.cutflow.addcut('weight_mutaufilter_NUP1', "no cut, weighted, mutau, 1 jet", 19 )
self.out.cutflow.addcut('weight_mutaufilter_NUP2', "no cut, weighted, mutau, 2 jets", 20 )
self.out.cutflow.addcut('weight_mutaufilter_NUP3', "no cut, weighted, mutau, 3 jets", 21 )
self.out.cutflow.addcut('weight_mutaufilter_NUP4', "no cut, weighted, mutau, 4 jets", 22 )


def beginJob(self):
"""Before processing any events or files."""
Expand Down Expand Up @@ -244,8 +244,8 @@ def analyze(self, event):
self.out.genvistaueta_2[0] = eta
self.out.genvistauphi_2[0] = phi
self.out.gendm_2[0] = status
if self.dozpt:
self.out.mutaufilter[0] = filtermutau(event) # for stitching DYJetsToTauTauToMuTauh
if self.domutau:
self.out.mutaufilter[0] = self.ismutau #filtermutau(event) # for stitching DYJetsToTauTauToMuTauh


# JETS
Expand Down
45 changes: 24 additions & 21 deletions PicoProducer/python/analysis/ModuleTauPair.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(self, fname, **kwargs):
self.tauwp = kwargs.get('tauwp', 1 ) # minimum DeepTau WP, e.g. 1 = VVVLoose, etc.
self.dotoppt = kwargs.get('toppt', 'TT' in fname ) # top pT reweighting
self.dozpt = kwargs.get('zpt', 'DY' in fname ) # Z pT reweighting
self.domutau = kwargs.get('domutau', 'DY' in fname or self.dozpt ) # mutau genfilter for stitching DY sample
self.dopdf = kwargs.get('dopdf', False ) and self.ismc # store PDF & scale weights
self.dorecoil = kwargs.get('recoil', False ) and self.ismc # recoil corrections #('DY' in name or re.search(r"W\d?Jets",name)) and self.year==2016) # and self.year==2016
self.dosys = self.tessys in [None,''] and self.ltf in [1,None] and self.jtf in [1,None] # include systematic variations of weight
Expand All @@ -67,6 +68,7 @@ def __init__(self, fname, **kwargs):
self.filter = getmetfilters(self.era,self.isdata,verb=self.verbosity)

# CORRECTIONS
self.ismutau = False # event passes gen mutau filter (to avoid computing twice)
self.ptnom = lambda j: j.pt # use 'pt' as nominal jet pt (not corrected)
self.jecUncLabels = [ ]
self.metUncLabels = [ ]
Expand Down Expand Up @@ -112,6 +114,7 @@ def beginJob(self):
print(">>> %-12s = %s"%('dotoppt', self.dotoppt))
print(">>> %-12s = %s"%('dopdf', self.dopdf))
print(">>> %-12s = %s"%('dozpt', self.dozpt))
print(">>> %-12s = %s"%('domutau', self.domutau))
#print ">>> %-12s = %s"%('dorecoil', self.dorecoil)
print(">>> %-12s = %s"%('dojec', self.dojec))
print(">>> %-12s = %s"%('dojecsys', self.dojecsys))
Expand Down Expand Up @@ -151,9 +154,8 @@ def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
('Tau_idDeepTau2018v2p5VSjet','Tau_idDeepTau2017v2p1VSjet'),
('Tau_rawDeepTau2018v2p5VSe','Tau_rawDeepTau2017v2p1VSe'),
('Tau_rawDeepTau2018v2p5VSmu','Tau_rawDeepTau2017v2p1VSmu'),
('Tau_rawDeepTau2018v2p5VSjet','Tau_rawDeepTau2017v2p1VSjet')

]
('Tau_rawDeepTau2018v2p5VSjet','Tau_rawDeepTau2017v2p1VSjet')
]
# for v9
branches = [
('Electron_mvaFall17V2Iso', 'Electron_mvaFall17Iso' ),
Expand All @@ -174,8 +176,7 @@ def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
('Tau_idDeepTau2018v2p5VSjet','Tau_idDeepTau2017v2p1VSjet'),
('Tau_rawDeepTau2018v2p5VSe','Tau_rawDeepTau2017v2p1VSe'),
('Tau_rawDeepTau2018v2p5VSmu','Tau_rawDeepTau2017v2p1VSmu'),
('Tau_rawDeepTau2018v2p5VSjet','Tau_rawDeepTau2017v2p1VSjet')

('Tau_rawDeepTau2018v2p5VSjet','Tau_rawDeepTau2017v2p1VSjet')
]
if self.year==2016:
branches += [
Expand Down Expand Up @@ -236,22 +237,24 @@ def fillhists(self,event):
self.out.cutflow.fill('weight_no0PU',event.genWeight)
else: # bug in pre-UL 2017 caused small fraction of events with nPU<=0
return False
# Specific selections to compute mutau filter efficiencies for stitching of different DY samples
isMuTau = filtermutau(event)
self.out.cutflow.fill('weight_mutaufilter',event.genWeight*isMuTau)
try:
if event.LHE_Njets==0 or event.LHE_Njets>4:
self.out.cutflow.fill('weight_mutaufilter_NUP0orp4',event.genWeight*isMuTau)
elif event.LHE_Njets==1:
self.out.cutflow.fill('weight_mutaufilter_NUP1',event.genWeight*isMuTau)
elif event.LHE_Njets==2:
self.out.cutflow.fill('weight_mutaufilter_NUP2',event.genWeight*isMuTau)
elif event.LHE_Njets==3:
self.out.cutflow.fill('weight_mutaufilter_NUP3',event.genWeight*isMuTau)
elif event.LHE_Njets==4:
self.out.cutflow.fill('weight_mutaufilter_NUP4',event.genWeight*isMuTau)
except RuntimeError:
no_LHE_Njets_var = True
# Specific selections to compute mutau filter efficiencies for stitching of different DY samples (DYJetsToTauTauToMuTauh)
if self.domutau:
self.ismutau = filtermutau(event) # event passes gen mutau filter
self.out.cutflow.fill('weight_mutaufilter',event.genWeight*isMuTau)
try:
if event.LHE_Njets==0 or event.LHE_Njets>4:
self.out.cutflow.fill('weight_mutaufilter_NUP0orp4',event.genWeight*isMuTau)
elif event.LHE_Njets==1:
self.out.cutflow.fill('weight_mutaufilter_NUP1',event.genWeight*isMuTau)
elif event.LHE_Njets==2:
self.out.cutflow.fill('weight_mutaufilter_NUP2',event.genWeight*isMuTau)
elif event.LHE_Njets==3:
self.out.cutflow.fill('weight_mutaufilter_NUP3',event.genWeight*isMuTau)
elif event.LHE_Njets==4:
self.out.cutflow.fill('weight_mutaufilter_NUP4',event.genWeight*isMuTau)
except RuntimeError:
print(">>> WARNING: RuntimeError! Setting domutau=False !")
self.domutau = False
self.out.pileup.Fill(event.Pileup_nTrueInt)

return True
Expand Down
2 changes: 1 addition & 1 deletion PicoProducer/python/analysis/TreeProducerMuTau.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,6 @@ def __init__(self, filename, module, **kwargs):
self.addBranch('idweightDown_dm_2', 'f', 1.)
self.addBranch('ltfweightUp_2', 'f', 1.)
self.addBranch('ltfweightDown_2', 'f', 1.)
if self.module.dozpt:
if self.module.domutau:
self.addBranch('mutaufilter', '?', title="has tautau -> mutau, pT>18, |eta|<2.5")

90 changes: 65 additions & 25 deletions PicoProducer/python/batch/submit_HTCondor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,79 @@
## Script to run on a HTCondor batch system

# START
START=`date +%s`
echo "Job start at `date`"
echo "Running job on machine `uname -a`, host $HOSTNAME"
function peval { echo ">>> $@"; eval "$@"; }
START="$(date +%s)"
echo "Job starts at $(date)"
echo "Running job on machine $(uname -a), host $HOSTNAME"

# SETTING
TASKCMD="$@"
WORKDIR="$PWD"
printf '=%.0s' `seq 60`; echo
echo "\$PWD=$PWD"
echo "\$JOBID=$JOBID"
echo "\$TASKID=$TASKID"
echo "\$HOSTNAME=$HOSTNAME"
echo "\$TASKCMD=$TASKCMD"
echo "\$WORKDIR=$WORKDIR"
#printf '=%.0s' `seq 60`; echo
#env
#printf '=%.0s' `seq 60`; echo
# SETTINGS: store in file for reuse
cat << EOF > setenv.sh
function peval { echo ">>> \$@"; eval "\$@"; }
function pbar { printf '=%.0s' \$(seq \${1:-70}); echo; }
VERB=${VERB:-0} # verbosity level for debugging
START=$START
JOBID=$JOBID
TASKID=$TASKID
HOSTNAME=$HOSTNAME
PWD=$PWD
WORKDIR=$PWD
CONTAINER='${CONTAINER:-${APPTAINER_CONTAINER:-${SINGULARITY_CONTAINER:-}}}' # to set OS environment with container (Singularity, e.g. "cmssw-el7")
CMSSW_BASE=$CMSSW_BASE # to set CMSSW environment
TASKCMD='$@'
EOF
source setenv.sh # set environment
pbar
peval 'tail -n +3 setenv.sh | while read line; do echo "\$$line"; done'
pbar
[ $VERB -ge 1 ] && { peval "env"; pbar; } # print out environment for debugging

# OS ENVIRONMENT with container (Singularity)
# https://cms-sw.github.io/singularity.html
# https://apptainer.org/docs/user/main/environment_and_metadata.html
if [ ! -z "$CONTAINER" ]; then # if $CONTAINER is set
echo ">>> Setting OS environment with container '$CONTAINER'..."
if [[ "$CONTAINER" = *"/"* ]]; then # container/singularity image, e.g. "/cvmfs/unpacked.cern.ch/registry.hub.docker.com/cmssw/el7:x86_64"
peval "cmssw-env --cmsos \$(basename $CONTAINER)"
else # container/singularity command, e.g. just "cmssw-el7"
peval "$CONTAINER" # setup container
fi
peval "source setenv.sh" # set environment again (incl. functions) after Singularity
[ $VERB -ge 1 ] && { peval "env"; pbar; } # print out environment for debugging
fi

# ENVIRONMENT
if [ ! -z "$CMSSW_BASE" -a -d "$CMSSW_BASE/src" ]; then
peval "cd $CMSSW_BASE/src"
peval 'eval `scramv1 runtime -sh`'
peval "cd $WORKDIR"
# CMSSW ENVIRONMENT
if [ -z "$CMSSW_BASE" ]; then # $CMSSW_BASE is not set
echo ">>> WARNING! CMSSW_BASE was not defined!"
# Guess CMSSW_BASE path from $CMSSW_BASE/src/TauFW/PicoProducer/python/batch/submit_HTCondor.sh
SCRIPT="$(echo $TASKCMD | awk '{ print $2 }')" # assume `[COMMAND] [SCRIPT] [OPTIONS]`
CMSSW_BASE=$(realpath $(dirname "${SCRIPT}")/../../../../..)
echo ">>> Guessing CMSSW_BASE=$CMSSW_BASE based on SCRIPT=$SCRIPT"
fi
if [ ! -z "$CMSSW_BASE" ]; then # $CMSSW_BASE is set
echo ">>> Setting CMSSW environment from CMSSW_BASE=$CMSSW_BASE..."
if [ -d "/cvmfs/cms.cern.ch/" ]; then # /cvmfs exists/mounted
peval "source /cvmfs/cms.cern.ch/cmsset_default.sh"
else # could not find /cvmfs
echo ">>> WARNING! /cvmfs/cms.cern.ch/ does not exist or not mounted on machine $(uname -a), host $HOSTNAME !"
fi
if [ -d "$CMSSW_BASE/src" ]; then # $CMSSW_BASE exists/mounted
peval "cd $CMSSW_BASE/src"
peval 'eval $(scramv1 runtime -sh)' # = cmsenv
peval "cd $WORKDIR"
else # could not find CMSSW
echo ">>> WARNING! $CMSSW_BASE/src does not exist or not mounted on machine $(uname -a), host $HOSTNAME !"
fi
fi

# MAIN FUNCTIONALITY
#TASKCMD=$(cat $JOBLIST | sed "${TASKID}q;d")
pbar
#TASKCMD=$(cat $JOBLIST | sed "${TASKID}q;d") # get TASKCMD from job list file
echo "\$PWD=$PWD"
peval "$TASKCMD"

# FINISH
echo
END=`date +%s`; RUNTIME=$((END-START))
echo "Job complete at `date`"
pbar
peval "rm $WORKDIR/setenv.sh"
END="$(date +%s)"; RUNTIME=$((END-START))
echo "Job complete at $(date)"
printf "Took %d minutes %d seconds" "$(( $RUNTIME / 60 ))" "$(( $RUNTIME % 60 ))"
18 changes: 14 additions & 4 deletions PicoProducer/python/batch/submit_HTCondor.sub
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,20 @@ output = $(mylogfile)
error = $(mylogfile)
should_transfer_files = no
use_x509userproxy = true
getenv = true
environment = JOBID=$(ClusterId);TASKID=$(ProcId)
# NOTE:
# The following 'container' line is used to explicitly set the OS environment with a container/singularity,
# by adding the `-append container=cmssw-cc7` option to condor_submit, or defining `export CONTAINER=cmssw-cc7`
# in the shell's environment. Alternatively, if APPTAINER_CONTAINER or CONTAINER_CONTAINER is defined and
# exported in your environment (e.g. inside a container), the shell script use this with `cmssw-env`.
container = $ENV(CONTAINER:$ENV(APPTAINER_CONTAINER:$ENV(SINGULARITY_CONTAINER:)))
# NOTE: `getenv = true` may put a large load on HTCondor infrastructure for the large CMSSW
# environment and many jobs, so it's better to set the environment during the job itself
getenv = CMSSW*, SCRAM_ARCH*, *CONTAINER
# NOTE: Make sure to use quotation marks (") to avoid issues with the following line on lxplus:
environment = "JOBID=$(ClusterId) TASKID=$(ProcId) CONTAINER=$(container)"
# NOTE: The following OS requirement should be replaced for CMSSW 13 & later,
# or replaced with singularities all together when CentOS7 is phased out on lxplus
requirements = (OpSysAndVer =?= "CentOS7")
+JobFlavour = workday
+MaxRuntime = 20000
#+AccountingGroup = "group_u_BE.ABP.SLAP"
#queue arg from args.txt
#queue arg from args.txt
20 changes: 16 additions & 4 deletions PicoProducer/python/batch/submit_HTCondor_CMG.sub
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Submit as
# condor_submit submit_HTCondor.sub 'mylogfile=log/myjob.$(ClusterId).$(ProcId).log' -queue arg from args.txt
# condor_submit submit_HTCondor_CMG.sub 'mylogfile=log/myjob.$(ClusterId).$(ProcId).log' -queue arg from args.txt
universe = vanilla
executable = python/batch/submit_HTCondor.sh
arguments = $(arg)
Expand All @@ -10,9 +10,21 @@ output = $(mylogfile)
error = $(mylogfile)
should_transfer_files = no
use_x509userproxy = true
getenv = true
environment = JOBID=$(ClusterId);TASKID=$(ProcId)
# NOTE:
# The following 'container' line is used to explicitly set the OS environment with a container/singularity,
# by adding the `-append container=cmssw-cc7` option to condor_submit, or defining `export CONTAINER=cmssw-cc7`
# in the shell's environment. Alternatively, if APPTAINER_CONTAINER or CONTAINER_CONTAINER is defined and
# exported in your environment (e.g. inside a container), the shell script use this with `cmssw-env`.
container = $ENV(CONTAINER:$ENV(APPTAINER_CONTAINER:$ENV(SINGULARITY_CONTAINER:)))
# NOTE: `getenv = true` may put a large load on HTCondor infrastructure for the large CMSSW
# environment and many jobs, so it's better to set the environment during the job itself
getenv = CMSSW*, SCRAM_ARCH*, *CONTAINER
# NOTE: Make sure to use quotation marks (") to avoid issues with the following line on lxplus:
environment = "JOBID=$(ClusterId) TASKID=$(ProcId) CONTAINER=$(container)"
# NOTE: The following OS requirement should be replaced for CMSSW 13 & later,
# or replaced with singularities all together when CentOS7 is phased out on lxplus
requirements = (OpSysAndVer =?= "CentOS7")
+JobFlavour = workday
+MaxRuntime = 20000
+AccountingGroup = "group_u_CMST3.all"
#queue arg from args.txt
#queue arg from args.txt
36 changes: 13 additions & 23 deletions PicoProducer/python/storage/EOS.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,35 @@
# https://cern.service-now.com/service-portal?id=kb_article&sys_id=fae8543fc9ed05006d218776d679b74a
import os
from TauFW.common.tools.utils import execute
from TauFW.PicoProducer.storage.utils import host
from TauFW.PicoProducer.storage.StorageSystem import StorageSystem
import getpass, platform


class EOS(StorageSystem):

def __init__(self,path,verb=0,ensure=False,eos=False,**kwargs):
"""EOS is mounted on lxplus, so no special override are necessary."""
"""EOS is mounted on lxplus, so no special overload are necessary."""
super(EOS,self).__init__(path,verb=verb,ensure=False,**kwargs)
if not self.mounted: # EOS is mounted on lxplus
self.fileurl = "root://eosuser.cern.ch/" #"root://eoscms/"
if eos: # use EOS command
# https://cern.service-now.com/service-portal?id=kb_article&n=KB0001998
os.environ["EOS_MGM_URL"] = "root://eosuser.cern.ch"
self.lscmd = "eos ls" # first do export EOS_MGM_URL=root://eosuser.cern.ch
self.lscmd = "eos rm" # first do export EOS_MGM_URL=root://eosuser.cern.ch
else: # use uberftp; NOTE: doest not work for /eos/user/...
#self.lscmd = "uberftp -ls" # stopped working 12/2023
#self.lsurl = "gsiftp://eoscmsftp.cern.ch/"
self.lscmd = "LD_LIBRARY_PATH='' PYTHONPATH='' gfal-ls -l"
self.lsurl = "root://eoscms.cern.ch/"
else: # NOTE: uberftp no longer supported for EOS...
unset = "" if 'ucl' in host else "LD_LIBRARY_PATH='' PYTHONPATH='' " # unset libraries that break gFal tools
self.lscmd = unset+"gfal-ls -l"
self.lsurl = self.fileurl #"root://eoscms.cern.ch/"
self.lscol = -1 # take last column
self.rmcmd = 'uberftp -rm'
self.rmurl = 'gsiftp://eoscmsftp.cern.ch/'
self.mkdir = self._mkdir # override default StorageSystem.mkdir
self.mkdrcmd = unset+"gfal-mkdir -p"
self.mkdrurl = self.fileurl
self.rmcmd = unset+"gfal-rm -r"
self.rmurl = self.fileurl
self.cpcmd = 'xrdcp -f'
self.cpurl = self.fileurl
self.chmdprm = '2777'
self.cpurl = "root://eoscms.cern.ch/"
self.fileurl = "root://eosuser.cern.ch/" #"root://eoscms/"
#self.prefix = "root://eoscms.cern.ch/"
self.tmpdir = '/tmp/$USER/'
if ensure:
self.ensuredir(self.path)

#def _rm(self,*paths,**kwargs):
# path = self.expandpath(*paths,here=True)
# verb = kwargs.get('verb',self.verbosity)
# return self.execute("uberftp storage01.lcg.cscs.ch 'rm -r %s'"%(path),verb=verb)

def _mkdir(self,dirname='$PATH',**kwargs):
verb = kwargs.get('verb',self.verbosity)
dirname = self.expandpath(dirname,here=True)
return self.execute("uberftp eoscmsftp.cern.ch 'mkdir %s'"%(dirname),verb=verb)

4 changes: 2 additions & 2 deletions PicoProducer/python/storage/StorageSystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self,path,verb=0,**kwargs):
self.chmdcmd = 'chmod'
self.chmdurl = ''
self.haddcmd = kwargs.get('haddcmd',None) or 'hadd -ff' # same compression level as first in put
self.tmpdir = '/tmp/$USER/' # $TMPDIR # mounted temporary directory
self.tmpdir = os.environ.get('TMPDIR',os.path.join('/tmp/',os.environ.get('USER','TauFW'))) # mounted temporary directory
self.fileurl = ""
self.verbosity = verb
if path.startswith('/'):
Expand Down Expand Up @@ -126,7 +126,7 @@ def ls(self,*paths,**kwargs):
retlist = [x for x in retlist.split(delim) if x]
if isinstance(lscol,int):
retlist = [l.split(' ')[lscol] for l in retlist]
if retlist and 'No such file or directory' in retlist[0]:
if retlist and any(e in retlist[0] for e in ['No such file or directory','[ERROR]']):
LOG.warning(retlist[0])
retlist = [ ]
elif filters:
Expand Down
2 changes: 1 addition & 1 deletion PicoProducer/python/storage/T2_PSI.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self,path,verb=0,ensure=False,**kwargs):
#self.cpurl = "root://storage01.lcg.cscs.ch/"
self.cpcmd = "LD_LIBRARY_PATH='' PYTHONPATH='' gfal-copy --force"
self.cpurl = "gsiftp://storage01.lcg.cscs.ch/"
self.tmpdir = '/scratch/$USER/'
self.tmpdir = os.path('/scratch/',os.environ.get('USER','TauFW'))
self.fileurl = "root://storage01.lcg.cscs.ch/"
if ensure:
self.ensuredir(self.path)
Expand Down
3 changes: 2 additions & 1 deletion PicoProducer/python/storage/T3_PSI.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#! /usr/bin/env python
# Author: Izaak Neutelings (May 2020)
import os
from TauFW.PicoProducer.storage.StorageSystem import StorageSystem


Expand All @@ -18,7 +19,7 @@ def __init__(self,path,verb=0,ensure=False,**kwargs):
self.cpcmd = 'xrdcp -f'
self.cpurl = "root://t3dcachedb03.psi.ch/"
self.fileurl = "root://t3dcachedb03.psi.ch/"
self.tmpdir = '/scratch/$USER/'
self.tmpdir = os.path('/scratch/',os.environ.get('USER','TauFW'))
if ensure:
self.ensuredir(self.path)

Expand Down
Loading

0 comments on commit 60facf1

Please sign in to comment.