Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix HTCondor environment issue, external acces to EOS & more #67

Merged
merged 11 commits into from
Apr 24, 2024
8 changes: 4 additions & 4 deletions PicoProducer/python/analysis/ModuleMuTau.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ def __init__(self, fname, **kwargs):
self.out.cutflow.addcut('pair', "pair" )
self.out.cutflow.addcut('weight', "no cut, weighted", 15 )
self.out.cutflow.addcut('weight_no0PU', "no cut, weighted, PU>0", 16 ) # use for normalization; bug in pre-UL 2017 caused small fraction of events with nPU<=0
## Important cutflow entries to make stitching with exclusive mutauh sample
# Important cutflow entries to make stitching with exclusive mutauh sample (DYJetsToTauTauToMuTauh)
self.out.cutflow.addcut('weight_mutaufilter', "no cut, mutaufilter", 17 )
self.out.cutflow.addcut('weight_mutaufilter_NUP0orp4', "no cut, weighted, mutau, 0 or >4 jets", 18 )
self.out.cutflow.addcut('weight_mutaufilter_NUP1', "no cut, weighted, mutau, 1 jet", 19 )
self.out.cutflow.addcut('weight_mutaufilter_NUP2', "no cut, weighted, mutau, 2 jets", 20 )
self.out.cutflow.addcut('weight_mutaufilter_NUP3', "no cut, weighted, mutau, 3 jets", 21 )
self.out.cutflow.addcut('weight_mutaufilter_NUP4', "no cut, weighted, mutau, 4 jets", 22 )


def beginJob(self):
"""Before processing any events or files."""
Expand Down Expand Up @@ -244,8 +244,8 @@ def analyze(self, event):
self.out.genvistaueta_2[0] = eta
self.out.genvistauphi_2[0] = phi
self.out.gendm_2[0] = status
if self.dozpt:
self.out.mutaufilter[0] = filtermutau(event) # for stitching DYJetsToTauTauToMuTauh
if self.domutau:
self.out.mutaufilter[0] = self.ismutau #filtermutau(event) # for stitching DYJetsToTauTauToMuTauh


# JETS
Expand Down
45 changes: 24 additions & 21 deletions PicoProducer/python/analysis/ModuleTauPair.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def __init__(self, fname, **kwargs):
self.tauwp = kwargs.get('tauwp', 1 ) # minimum DeepTau WP, e.g. 1 = VVVLoose, etc.
self.dotoppt = kwargs.get('toppt', 'TT' in fname ) # top pT reweighting
self.dozpt = kwargs.get('zpt', 'DY' in fname ) # Z pT reweighting
self.domutau = kwargs.get('domutau', 'DY' in fname or self.dozpt ) # mutau genfilter for stitching DY sample
self.dopdf = kwargs.get('dopdf', False ) and self.ismc # store PDF & scale weights
self.dorecoil = kwargs.get('recoil', False ) and self.ismc # recoil corrections #('DY' in name or re.search(r"W\d?Jets",name)) and self.year==2016) # and self.year==2016
self.dosys = self.tessys in [None,''] and self.ltf in [1,None] and self.jtf in [1,None] # include systematic variations of weight
Expand All @@ -66,6 +67,7 @@ def __init__(self, fname, **kwargs):
self.filter = getmetfilters(self.era,self.isdata,verb=self.verbosity)

# CORRECTIONS
self.ismutau = False # event passes gen mutau filter (to avoid computing twice)
self.ptnom = lambda j: j.pt # use 'pt' as nominal jet pt (not corrected)
self.jecUncLabels = [ ]
self.metUncLabels = [ ]
Expand Down Expand Up @@ -111,6 +113,7 @@ def beginJob(self):
print(">>> %-12s = %s"%('dotoppt', self.dotoppt))
print(">>> %-12s = %s"%('dopdf', self.dopdf))
print(">>> %-12s = %s"%('dozpt', self.dozpt))
print(">>> %-12s = %s"%('domutau', self.domutau))
#print ">>> %-12s = %s"%('dorecoil', self.dorecoil)
print(">>> %-12s = %s"%('dojec', self.dojec))
print(">>> %-12s = %s"%('dojecsys', self.dojecsys))
Expand Down Expand Up @@ -150,9 +153,8 @@ def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
('Tau_idDeepTau2018v2p5VSjet','Tau_idDeepTau2017v2p1VSjet'),
('Tau_rawDeepTau2018v2p5VSe','Tau_rawDeepTau2017v2p1VSe'),
('Tau_rawDeepTau2018v2p5VSmu','Tau_rawDeepTau2017v2p1VSmu'),
('Tau_rawDeepTau2018v2p5VSjet','Tau_rawDeepTau2017v2p1VSjet')

]
('Tau_rawDeepTau2018v2p5VSjet','Tau_rawDeepTau2017v2p1VSjet')
]
# for v9
branches = [
('Electron_mvaFall17V2Iso', 'Electron_mvaFall17Iso' ),
Expand All @@ -173,8 +175,7 @@ def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
('Tau_idDeepTau2018v2p5VSjet','Tau_idDeepTau2017v2p1VSjet'),
('Tau_rawDeepTau2018v2p5VSe','Tau_rawDeepTau2017v2p1VSe'),
('Tau_rawDeepTau2018v2p5VSmu','Tau_rawDeepTau2017v2p1VSmu'),
('Tau_rawDeepTau2018v2p5VSjet','Tau_rawDeepTau2017v2p1VSjet')

('Tau_rawDeepTau2018v2p5VSjet','Tau_rawDeepTau2017v2p1VSjet')
]
if self.year==2016:
branches += [
Expand Down Expand Up @@ -219,22 +220,24 @@ def fillhists(self,event):
self.out.cutflow.fill('weight_no0PU',event.genWeight)
else: # bug in pre-UL 2017 caused small fraction of events with nPU<=0
return False
# Specific selections to compute mutau filter efficiencies for stitching of different DY samples
isMuTau = filtermutau(event)
self.out.cutflow.fill('weight_mutaufilter',event.genWeight*isMuTau)
try:
if event.LHE_Njets==0 or event.LHE_Njets>4:
self.out.cutflow.fill('weight_mutaufilter_NUP0orp4',event.genWeight*isMuTau)
elif event.LHE_Njets==1:
self.out.cutflow.fill('weight_mutaufilter_NUP1',event.genWeight*isMuTau)
elif event.LHE_Njets==2:
self.out.cutflow.fill('weight_mutaufilter_NUP2',event.genWeight*isMuTau)
elif event.LHE_Njets==3:
self.out.cutflow.fill('weight_mutaufilter_NUP3',event.genWeight*isMuTau)
elif event.LHE_Njets==4:
self.out.cutflow.fill('weight_mutaufilter_NUP4',event.genWeight*isMuTau)
except RuntimeError:
no_LHE_Njets_var = True
# Specific selections to compute mutau filter efficiencies for stitching of different DY samples (DYJetsToTauTauToMuTauh)
if self.domutau:
self.ismutau = filtermutau(event) # event passes gen mutau filter
self.out.cutflow.fill('weight_mutaufilter',event.genWeight*isMuTau)
try:
if event.LHE_Njets==0 or event.LHE_Njets>4:
self.out.cutflow.fill('weight_mutaufilter_NUP0orp4',event.genWeight*isMuTau)
elif event.LHE_Njets==1:
self.out.cutflow.fill('weight_mutaufilter_NUP1',event.genWeight*isMuTau)
elif event.LHE_Njets==2:
self.out.cutflow.fill('weight_mutaufilter_NUP2',event.genWeight*isMuTau)
elif event.LHE_Njets==3:
self.out.cutflow.fill('weight_mutaufilter_NUP3',event.genWeight*isMuTau)
elif event.LHE_Njets==4:
self.out.cutflow.fill('weight_mutaufilter_NUP4',event.genWeight*isMuTau)
except RuntimeError:
print(">>> WARNING: RuntimeError! Setting domutau=False !")
self.domutau = False
self.out.pileup.Fill(event.Pileup_nTrueInt)

return True
Expand Down
2 changes: 1 addition & 1 deletion PicoProducer/python/analysis/TreeProducerMuTau.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,6 @@ def __init__(self, filename, module, **kwargs):
self.addBranch('idweightDown_dm_2', 'f', 1.)
self.addBranch('ltfweightUp_2', 'f', 1.)
self.addBranch('ltfweightDown_2', 'f', 1.)
if self.module.dozpt:
if self.module.domutau:
self.addBranch('mutaufilter', '?', title="has tautau -> mutau, pT>18, |eta|<2.5")

90 changes: 65 additions & 25 deletions PicoProducer/python/batch/submit_HTCondor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,79 @@
## Script to run on a HTCondor batch system

# START
START=`date +%s`
echo "Job start at `date`"
echo "Running job on machine `uname -a`, host $HOSTNAME"
function peval { echo ">>> $@"; eval "$@"; }
START="$(date +%s)"
echo "Job starts at $(date)"
echo "Running job on machine $(uname -a), host $HOSTNAME"

# SETTING
TASKCMD="$@"
WORKDIR="$PWD"
printf '=%.0s' `seq 60`; echo
echo "\$PWD=$PWD"
echo "\$JOBID=$JOBID"
echo "\$TASKID=$TASKID"
echo "\$HOSTNAME=$HOSTNAME"
echo "\$TASKCMD=$TASKCMD"
echo "\$WORKDIR=$WORKDIR"
#printf '=%.0s' `seq 60`; echo
#env
#printf '=%.0s' `seq 60`; echo
# SETTINGS: store in file for reuse
cat << EOF > setenv.sh
function peval { echo ">>> \$@"; eval "\$@"; }
function pbar { printf '=%.0s' \$(seq \${1:-70}); echo; }
VERB=${VERB:-0} # verbosity level for debugging
START=$START
JOBID=$JOBID
TASKID=$TASKID
HOSTNAME=$HOSTNAME
PWD=$PWD
WORKDIR=$PWD
CONTAINER='${CONTAINER:-${APPTAINER_CONTAINER:-${SINGULARITY_CONTAINER:-}}}' # to set OS environment with container (Singularity, e.g. "cmssw-el7")
CMSSW_BASE=$CMSSW_BASE # to set CMSSW environment
TASKCMD='$@'
EOF
source setenv.sh # set environment
pbar
peval 'tail -n +3 setenv.sh | while read line; do echo "\$$line"; done'
pbar
[ $VERB -ge 1 ] && { peval "env"; pbar; } # print out environment for debugging

# OS ENVIRONMENT with container (Singularity)
# https://cms-sw.github.io/singularity.html
# https://apptainer.org/docs/user/main/environment_and_metadata.html
if [ ! -z "$CONTAINER" ]; then # if $CONTAINER is set
echo ">>> Setting OS environment with container '$CONTAINER'..."
if [[ "$CONTAINER" = *"/"* ]]; then # container/singularity image, e.g. "/cvmfs/unpacked.cern.ch/registry.hub.docker.com/cmssw/el7:x86_64"
peval "cmssw-env --cmsos \$(basename $CONTAINER)"
else # container/singularity command, e.g. just "cmssw-el7"
peval "$CONTAINER" # setup container
fi
peval "source setenv.sh" # set environment again (incl. functions) after Singularity
[ $VERB -ge 1 ] && { peval "env"; pbar; } # print out environment for debugging
fi

# ENVIRONMENT
if [ ! -z "$CMSSW_BASE" -a -d "$CMSSW_BASE/src" ]; then
peval "cd $CMSSW_BASE/src"
peval 'eval `scramv1 runtime -sh`'
peval "cd $WORKDIR"
# CMSSW ENVIRONMENT
if [ -z "$CMSSW_BASE" ]; then # $CMSSW_BASE is not set
echo ">>> WARNING! CMSSW_BASE was not defined!"
# Guess CMSSW_BASE path from $CMSSW_BASE/src/TauFW/PicoProducer/python/batch/submit_HTCondor.sh
SCRIPT="$(echo $TASKCMD | awk '{ print $2 }')" # assume `[COMMAND] [SCRIPT] [OPTIONS]`
CMSSW_BASE=$(realpath $(dirname "${SCRIPT}")/../../../../..)
echo ">>> Guessing CMSSW_BASE=$CMSSW_BASE based on SCRIPT=$SCRIPT"
fi
if [ ! -z "$CMSSW_BASE" ]; then # $CMSSW_BASE is set
echo ">>> Setting CMSSW environment from CMSSW_BASE=$CMSSW_BASE..."
if [ -d "/cvmfs/cms.cern.ch/" ]; then # /cvmfs exists/mounted
peval "source /cvmfs/cms.cern.ch/cmsset_default.sh"
else # could not find /cvmfs
echo ">>> WARNING! /cvmfs/cms.cern.ch/ does not exist or not mounted on machine $(uname -a), host $HOSTNAME !"
fi
if [ -d "$CMSSW_BASE/src" ]; then # $CMSSW_BASE exists/mounted
peval "cd $CMSSW_BASE/src"
peval 'eval $(scramv1 runtime -sh)' # = cmsenv
peval "cd $WORKDIR"
else # could not find CMSSW
echo ">>> WARNING! $CMSSW_BASE/src does not exist or not mounted on machine $(uname -a), host $HOSTNAME !"
fi
fi

# MAIN FUNCTIONALITY
#TASKCMD=$(cat $JOBLIST | sed "${TASKID}q;d")
pbar
#TASKCMD=$(cat $JOBLIST | sed "${TASKID}q;d") # get TASKCMD from job list file
echo "\$PWD=$PWD"
peval "$TASKCMD"

# FINISH
echo
END=`date +%s`; RUNTIME=$((END-START))
echo "Job complete at `date`"
pbar
peval "rm $WORKDIR/setenv.sh"
END="$(date +%s)"; RUNTIME=$((END-START))
echo "Job complete at $(date)"
printf "Took %d minutes %d seconds" "$(( $RUNTIME / 60 ))" "$(( $RUNTIME % 60 ))"
18 changes: 14 additions & 4 deletions PicoProducer/python/batch/submit_HTCondor.sub
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,20 @@ output = $(mylogfile)
error = $(mylogfile)
should_transfer_files = no
use_x509userproxy = true
getenv = true
environment = JOBID=$(ClusterId);TASKID=$(ProcId)
# NOTE:
# The following 'container' line is used to explicitly set the OS environment with a container/singularity,
# by adding the `-append container=cmssw-cc7` option to condor_submit, or defining `export CONTAINER=cmssw-cc7`
# in the shell's environment. Alternatively, if APPTAINER_CONTAINER or CONTAINER_CONTAINER is defined and
# exported in your environment (e.g. inside a container), the shell script use this with `cmssw-env`.
container = $ENV(CONTAINER:$ENV(APPTAINER_CONTAINER:$ENV(SINGULARITY_CONTAINER:)))
# NOTE: `getenv = true` may put a large load on HTCondor infrastructure for the large CMSSW
# environment and many jobs, so it's better to set the environment during the job itself
getenv = CMSSW*, SCRAM_ARCH*, *CONTAINER
# NOTE: Make sure to use quotation marks (") to avoid issues with the following line on lxplus:
environment = "JOBID=$(ClusterId) TASKID=$(ProcId) CONTAINER=$(container)"
# NOTE: The following OS requirement should be replaced for CMSSW 13 & later,
# or replaced with singularities all together when CentOS7 is phased out on lxplus
requirements = (OpSysAndVer =?= "CentOS7")
+JobFlavour = workday
+MaxRuntime = 20000
#+AccountingGroup = "group_u_BE.ABP.SLAP"
#queue arg from args.txt
#queue arg from args.txt
20 changes: 16 additions & 4 deletions PicoProducer/python/batch/submit_HTCondor_CMG.sub
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Submit as
# condor_submit submit_HTCondor.sub 'mylogfile=log/myjob.$(ClusterId).$(ProcId).log' -queue arg from args.txt
# condor_submit submit_HTCondor_CMG.sub 'mylogfile=log/myjob.$(ClusterId).$(ProcId).log' -queue arg from args.txt
universe = vanilla
executable = python/batch/submit_HTCondor.sh
arguments = $(arg)
Expand All @@ -10,9 +10,21 @@ output = $(mylogfile)
error = $(mylogfile)
should_transfer_files = no
use_x509userproxy = true
getenv = true
environment = JOBID=$(ClusterId);TASKID=$(ProcId)
# NOTE:
# The following 'container' line is used to explicitly set the OS environment with a container/singularity,
# by adding the `-append container=cmssw-cc7` option to condor_submit, or defining `export CONTAINER=cmssw-cc7`
# in the shell's environment. Alternatively, if APPTAINER_CONTAINER or CONTAINER_CONTAINER is defined and
# exported in your environment (e.g. inside a container), the shell script use this with `cmssw-env`.
container = $ENV(CONTAINER:$ENV(APPTAINER_CONTAINER:$ENV(SINGULARITY_CONTAINER:)))
# NOTE: `getenv = true` may put a large load on HTCondor infrastructure for the large CMSSW
# environment and many jobs, so it's better to set the environment during the job itself
getenv = CMSSW*, SCRAM_ARCH*, *CONTAINER
# NOTE: Make sure to use quotation marks (") to avoid issues with the following line on lxplus:
environment = "JOBID=$(ClusterId) TASKID=$(ProcId) CONTAINER=$(container)"
# NOTE: The following OS requirement should be replaced for CMSSW 13 & later,
# or replaced with singularities all together when CentOS7 is phased out on lxplus
requirements = (OpSysAndVer =?= "CentOS7")
+JobFlavour = workday
+MaxRuntime = 20000
+AccountingGroup = "group_u_CMST3.all"
#queue arg from args.txt
#queue arg from args.txt
36 changes: 13 additions & 23 deletions PicoProducer/python/storage/EOS.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,35 @@
# https://cern.service-now.com/service-portal?id=kb_article&sys_id=fae8543fc9ed05006d218776d679b74a
import os
from TauFW.common.tools.utils import execute
from TauFW.PicoProducer.storage.utils import host
from TauFW.PicoProducer.storage.StorageSystem import StorageSystem
import getpass, platform


class EOS(StorageSystem):

def __init__(self,path,verb=0,ensure=False,eos=False,**kwargs):
"""EOS is mounted on lxplus, so no special override are necessary."""
"""EOS is mounted on lxplus, so no special overload are necessary."""
super(EOS,self).__init__(path,verb=verb,ensure=False,**kwargs)
if not self.mounted: # EOS is mounted on lxplus
self.fileurl = "root://eosuser.cern.ch/" #"root://eoscms/"
if eos: # use EOS command
# https://cern.service-now.com/service-portal?id=kb_article&n=KB0001998
os.environ["EOS_MGM_URL"] = "root://eosuser.cern.ch"
self.lscmd = "eos ls" # first do export EOS_MGM_URL=root://eosuser.cern.ch
self.lscmd = "eos rm" # first do export EOS_MGM_URL=root://eosuser.cern.ch
else: # use uberftp; NOTE: doest not work for /eos/user/...
#self.lscmd = "uberftp -ls" # stopped working 12/2023
#self.lsurl = "gsiftp://eoscmsftp.cern.ch/"
self.lscmd = "LD_LIBRARY_PATH='' PYTHONPATH='' gfal-ls -l"
self.lsurl = "root://eoscms.cern.ch/"
else: # NOTE: uberftp no longer supported for EOS...
unset = "" if 'ucl' in host else "LD_LIBRARY_PATH='' PYTHONPATH='' " # unset libraries that break gFal tools
self.lscmd = unset+"gfal-ls -l"
self.lsurl = self.fileurl #"root://eoscms.cern.ch/"
self.lscol = -1 # take last column
self.rmcmd = 'uberftp -rm'
self.rmurl = 'gsiftp://eoscmsftp.cern.ch/'
self.mkdir = self._mkdir # override default StorageSystem.mkdir
self.mkdrcmd = unset+"gfal-mkdir -p"
self.mkdrurl = self.fileurl
self.rmcmd = unset+"gfal-rm -r"
self.rmurl = self.fileurl
self.cpcmd = 'xrdcp -f'
self.cpurl = self.fileurl
self.chmdprm = '2777'
self.cpurl = "root://eoscms.cern.ch/"
self.fileurl = "root://eosuser.cern.ch/" #"root://eoscms/"
#self.prefix = "root://eoscms.cern.ch/"
self.tmpdir = '/tmp/$USER/'
if ensure:
self.ensuredir(self.path)

#def _rm(self,*paths,**kwargs):
# path = self.expandpath(*paths,here=True)
# verb = kwargs.get('verb',self.verbosity)
# return self.execute("uberftp storage01.lcg.cscs.ch 'rm -r %s'"%(path),verb=verb)

def _mkdir(self,dirname='$PATH',**kwargs):
verb = kwargs.get('verb',self.verbosity)
dirname = self.expandpath(dirname,here=True)
return self.execute("uberftp eoscmsftp.cern.ch 'mkdir %s'"%(dirname),verb=verb)

4 changes: 2 additions & 2 deletions PicoProducer/python/storage/StorageSystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self,path,verb=0,**kwargs):
self.chmdcmd = 'chmod'
self.chmdurl = ''
self.haddcmd = kwargs.get('haddcmd',None) or 'hadd -ff' # same compression level as first in put
self.tmpdir = '/tmp/$USER/' # $TMPDIR # mounted temporary directory
self.tmpdir = os.environ.get('TMPDIR',os.path.join('/tmp/',os.environ.get('USER','TauFW'))) # mounted temporary directory
self.fileurl = ""
self.verbosity = verb
if path.startswith('/'):
Expand Down Expand Up @@ -126,7 +126,7 @@ def ls(self,*paths,**kwargs):
retlist = [x for x in retlist.split(delim) if x]
if isinstance(lscol,int):
retlist = [l.split(' ')[lscol] for l in retlist]
if retlist and 'No such file or directory' in retlist[0]:
if retlist and any(e in retlist[0] for e in ['No such file or directory','[ERROR]']):
LOG.warning(retlist[0])
retlist = [ ]
elif filters:
Expand Down
2 changes: 1 addition & 1 deletion PicoProducer/python/storage/T2_PSI.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self,path,verb=0,ensure=False,**kwargs):
#self.cpurl = "root://storage01.lcg.cscs.ch/"
self.cpcmd = "LD_LIBRARY_PATH='' PYTHONPATH='' gfal-copy --force"
self.cpurl = "gsiftp://storage01.lcg.cscs.ch/"
self.tmpdir = '/scratch/$USER/'
self.tmpdir = os.path('/scratch/',os.environ.get('USER','TauFW'))
self.fileurl = "root://storage01.lcg.cscs.ch/"
if ensure:
self.ensuredir(self.path)
Expand Down
3 changes: 2 additions & 1 deletion PicoProducer/python/storage/T3_PSI.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#! /usr/bin/env python
# Author: Izaak Neutelings (May 2020)
import os
from TauFW.PicoProducer.storage.StorageSystem import StorageSystem


Expand All @@ -18,7 +19,7 @@ def __init__(self,path,verb=0,ensure=False,**kwargs):
self.cpcmd = 'xrdcp -f'
self.cpurl = "root://t3dcachedb03.psi.ch/"
self.fileurl = "root://t3dcachedb03.psi.ch/"
self.tmpdir = '/scratch/$USER/'
self.tmpdir = os.path('/scratch/',os.environ.get('USER','TauFW'))
if ensure:
self.ensuredir(self.path)

Expand Down
Loading