Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deterministic version of Shuffle&merge #148

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions Analysis/interface/AnalysisTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,15 @@ ENUM_NAMES(GenQcdMatch) = {
{ GenQcdMatch::Gluon, "gen_gluon" }
};

enum class TauType { e = 0, mu = 1, tau = 2, jet = 3, emb_e = 4, emb_mu = 5, emb_tau = 6, emb_jet = 7, data = 8 };
enum class TauType { e = 0, mu = 1, tau = 2, jet = 3, emb_e = 4, emb_mu = 5, emb_tau = 6, emb_jet = 7, data = 8,
displaced_e = 9, displaced_mu = 10, displaced_tau = 11, displaced_jet = 12 };
ENUM_NAMES(TauType) = {
{ TauType::e, "e" }, { TauType::mu, "mu" }, { TauType::tau, "tau" }, { TauType::jet, "jet" },
{ TauType::emb_e, "emb_e" }, { TauType::emb_mu, "emb_mu" }, { TauType::emb_tau, "emb_tau" }, { TauType::emb_jet, "emb_jet" },
{ TauType::data, "data" }
{ TauType::emb_e, "emb_e" }, { TauType::emb_mu, "emb_mu" },
{ TauType::emb_tau, "emb_tau" }, { TauType::emb_jet, "emb_jet" },
{ TauType::data, "data" },
{ TauType::displaced_e, "displaced_e" }, { TauType::displaced_mu, "displaced_mu" },
{ TauType::displaced_tau, "displaced_tau" }, { TauType::displaced_jet, "displaced_jet" }
};

enum class SampleType { Data = 0, MC = 1, Embedded = 2 };
Expand Down
238 changes: 238 additions & 0 deletions Preprocessing/HLT/AnalysisTools.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
#pragma once

#include <cmath>
#include <string>

using LorentzVectorXYZ = ROOT::Math::LorentzVector<ROOT::Math::PxPyPzE4D<double>>;
using LorentzVectorM = ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiM4D<double>>;
using LorentzVectorE = ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiE4D<double>>;
using RVecI = ROOT::VecOps::RVec<int>;
using RVecS = ROOT::VecOps::RVec<size_t>;
using RVecUC = ROOT::VecOps::RVec<UChar_t>;
using RVecF = ROOT::VecOps::RVec<float>;
using RVecB = ROOT::VecOps::RVec<bool>;
using RVecVecI = ROOT::VecOps::RVec<RVecI>;
using RVecLV = ROOT::VecOps::RVec<LorentzVectorM>;
using RVecSetInt = ROOT::VecOps::RVec<std::set<int>>;

template<typename T1, typename T2>
auto DeltaPhi(T1 phi1, T2 phi2) -> decltype(phi2 - phi1) { return ROOT::Math::VectorUtil::Phi_mpi_pi(phi2 - phi1); }
template<typename T1, typename T2>
auto DeltaEta(T1 eta1, T2 eta2) -> decltype(eta2 - eta1) { return eta2 - eta1; }
template<typename T1, typename T2, typename T3, typename T4>
auto DeltaR(T1 eta1, T2 phi1, T3 eta2, T4 phi2) -> decltype(eta1 + phi1 + eta2 + phi2)
{
const auto dphi = DeltaPhi(phi1, phi2);
const auto deta = DeltaEta(eta1, eta2);
return std::hypot(dphi, deta);
}

inline RVecS CreateIndexes(size_t vecSize)
{
RVecS i(vecSize);
std::iota(i.begin(), i.end(), 0);
return i;
}

template<typename V, typename Cmp=std::greater<typename V::value_type>>
RVecI ReorderObjects(const V& varToOrder, const RVecI& indices, size_t nMax=std::numeric_limits<size_t>::max(),
const Cmp& cmp=Cmp())
{
RVecI ordered_indices = indices;
std::sort(ordered_indices.begin(), ordered_indices.end(), [&](int a, int b) {
return cmp(varToOrder.at(a), varToOrder.at(b));
});
const size_t n = std::min(ordered_indices.size(), nMax);
ordered_indices.resize(n);
return ordered_indices;
}

template<typename T, int n_binary_places=std::numeric_limits<T>::digits>
std::string GetBinaryString(T x)
{
std::bitset<n_binary_places> bs(x);
std::ostringstream ss;
ss << bs;
return ss.str();
}

template<typename V1, typename V2, typename V3, typename V4>
RVecLV GetP4(const V1& pt, const V2& eta, const V3& phi, const V4& mass, const RVecS& indices)
{
const size_t N = pt.size();
assert(N == eta.size() && N == phi.size() && N == mass.size());
RVecLV p4;
p4.reserve(indices.size());
for(auto idx : indices) {
assert(idx < N);
p4.emplace_back(pt[idx], eta[idx], phi[idx], mass[idx]);
}
return p4;
}

template<typename V1, typename V2, typename V3, typename V4>
RVecLV GetP4(const V1& pt, const V2& eta, const V3& phi, const V4& mass)
{
return GetP4(pt, eta, phi, mass, CreateIndexes(pt.size()));
}


template<typename TIn>
struct InToOut {
using type = TIn;
};

template<>
struct InToOut<bool> {
using type = int;
};

template<>
struct InToOut<unsigned char> {
using type = int;
};

template<typename TIn, typename TOut = typename InToOut<TIn>::type>
ROOT::VecOps::RVec<TOut> GetVar(const RVecI& idx, const ROOT::VecOps::RVec<TIn>& var)
{
ROOT::VecOps::RVec<TOut> result(idx.size(), TOut(0));
for(size_t i = 0; i < idx.size(); ++i) {
if(idx[i] >= 0)
result[i] = var[idx[i]];
}
return result;
}

template<typename TIn, typename TOut = typename InToOut<TIn>::type>
ROOT::VecOps::RVec<ROOT::VecOps::RVec<TOut>> GetVarVec(const RVecSetInt& matched, const ROOT::VecOps::RVec<TIn>& var)
{
ROOT::VecOps::RVec<ROOT::VecOps::RVec<TOut>> result(matched.size());
for(size_t i = 0; i < matched.size(); ++i) {
for(auto idx : matched[i]) {
result[i].push_back(var[idx]);
}
}
return result;
}

inline RVecB RemoveOverlaps(const RVecLV& obj_p4, const RVecB& pre_sel, const std::vector<RVecLV>& other_objects,
size_t min_number_of_non_overlaps, double min_deltaR)
{
RVecB result(pre_sel);
const double min_deltaR2 = std::pow(min_deltaR, 2);

const auto hasMinNumberOfNonOverlaps = [&](const LorentzVectorM& p4) {
size_t cnt = 0;
for(const auto& other_obj_col : other_objects) {
for(const auto& other_obj_p4 : other_obj_col) {
const double dR2 = ROOT::Math::VectorUtil::DeltaR2(p4, other_obj_p4);
if(dR2 > min_deltaR2) {
++cnt;
if(cnt >= min_number_of_non_overlaps)
return true;
}
}
}
return false;
};

for(size_t obj_idx = 0; obj_idx < obj_p4.size(); ++obj_idx) {
result[obj_idx] = pre_sel[obj_idx] && hasMinNumberOfNonOverlaps(obj_p4.at(obj_idx));
}
return result;
}

inline std::pair<int, double> FindMatching(const LorentzVectorM& target_p4, const RVecLV& ref_p4, double deltaR_thr)
{
double deltaR_min = deltaR_thr;
int current_idx = -1;
for(int refIdx = 0; refIdx < ref_p4.size(); ++refIdx) {
const auto dR_targetRef = ROOT::Math::VectorUtil::DeltaR(target_p4, ref_p4.at(refIdx));
if(dR_targetRef < deltaR_min) {
deltaR_min = dR_targetRef;
current_idx = refIdx;
}
}
return std::make_pair(current_idx, deltaR_min);
}

inline RVecI FindBestMatching(const RVecLV& target_p4, const RVecLV& ref_p4, double deltaR_thr)
{
RVecI targetIndices(target_p4.size());
for(size_t targetIdx = 0; targetIdx < target_p4.size(); ++targetIdx) {
const auto match = FindMatching(target_p4[targetIdx], ref_p4, deltaR_thr);
targetIndices[targetIdx] = match.first;
}
return targetIndices;
}

inline RVecI FindUniqueMatching(const RVecLV& target_p4, const RVecLV& ref_p4, double deltaR_thr)
{
RVecI targetIndices(target_p4.size(), -1);
const auto default_matching = std::make_pair(-1, deltaR_thr);
std::vector<std::pair<int, double>> matchings(ref_p4.size(), default_matching);
auto refIndices = CreateIndexes(ref_p4.size());
std::sort(refIndices.begin(), refIndices.end(), [&](int a, int b) {
if(ref_p4[a].pt() != ref_p4[b].pt()) return ref_p4[a].pt() > ref_p4[b].pt();
if(ref_p4[a].mass() != ref_p4[b].mass()) return ref_p4[a].mass() > ref_p4[b].mass();
const auto a_eta = std::abs(ref_p4[a].eta());
const auto b_eta = std::abs(ref_p4[b].eta());
if(a_eta != b_eta) return a_eta < b_eta;
const auto a_dir = std::make_pair(ref_p4[a].eta(), ref_p4[a].phi());
const auto b_dir = std::make_pair(ref_p4[b].eta(), ref_p4[b].phi());
return a_dir < b_dir;
});
for(int refIdx : refIndices) {
matchings[refIdx] = FindMatching(ref_p4[refIdx], target_p4, deltaR_thr);
for(int prevIdx : refIndices) {
if(prevIdx == refIdx) break;
if(matchings[prevIdx].first == matchings[refIdx].first) {
matchings[refIdx] = default_matching;
break;
}
}
}
for(int refIdx = 0; refIdx < ref_p4.size(); ++refIdx) {
if(matchings[refIdx].first >= 0)
targetIndices[matchings[refIdx].first] = refIdx;
}
return targetIndices;
}

inline RVecSetInt FindMatchingSet(const RVecLV& target_p4, const RVecLV& ref_p4, double dR_thr,
const RVecB& pre_sel_target, const RVecB& pre_sel_ref)
{
assert(target_p4.size() == pre_sel_target.size());
assert(ref_p4.size() == pre_sel_ref.size());
RVecSetInt findMatching(pre_sel_target.size());
for(size_t ref_idx = 0; ref_idx < pre_sel_ref.size(); ++ref_idx) {
if(!pre_sel_ref[ref_idx]) continue;
for(size_t target_idx = 0; target_idx < pre_sel_target.size(); ++target_idx) {
if(!pre_sel_target[target_idx]) continue;
const auto dR = ROOT::Math::VectorUtil::DeltaR(target_p4[target_idx], ref_p4[ref_idx]);
if(dR < dR_thr)
findMatching[target_idx].insert(ref_idx);
}
}
return findMatching;
}

inline RVecSetInt FindMatchingSet(const RVecLV& target_p4, const RVecLV& ref_p4, double dR_thr)
{
return FindMatchingSet(target_p4, ref_p4, dR_thr, RVecB(target_p4.size(), true), RVecB(ref_p4.size(), true));
}

namespace v_ops{
template<typename LV>
RVecF pt(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.pt(); }); }
template<typename LV>
RVecF eta(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.eta(); }); }
template<typename LV>
RVecF phi(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.phi(); }); }
template<typename LV>
RVecF mass(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.mass(); }); }
template<typename LV>
RVecF rho(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.rho(); }); }
template<typename LV>
RVecF z(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.z(); }); }
}
109 changes: 109 additions & 0 deletions Preprocessing/HLT/AnalysisTools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import datetime
from enum import Enum
import ROOT
import os

class TauType(Enum):
other = -1
e = 0
mu = 1
tau = 2
jet = 3
emb_e = 4
emb_mu = 5
emb_tau = 6
emb_jet = 7
data = 8
displaced_e = 9
displaced_mu = 10
displaced_tau = 11
displaced_jet = 12

def ListToVector(list, type="string"):
vec = ROOT.std.vector(type)()
for item in list:
vec.push_back(item)
return vec

def GenerateEnumCppString(cls):
enum_string = f"enum class {cls.__name__} : int {{\n"
for item in cls:
enum_string += f" {item.name} = {item.value},\n"
enum_string += "};"
return enum_string

def PrepareRootEnv():
headers_dir = os.path.dirname(os.path.abspath(__file__))
enums = [ TauType ]
headers = [ 'AnalysisTools.h', 'GenLepton.h', 'GenTools.h', 'TupleMaker.h' ]
for cls in enums:
cls_str = GenerateEnumCppString(cls)
if not ROOT.gInterpreter.Declare(cls_str):
raise RuntimeError(f'Failed to declare {cls.__name__}')
for header in headers:
header_path = os.path.join(headers_dir, header)
if not ROOT.gInterpreter.Declare(f'#include "{header_path}"'):
raise RuntimeError(f'Failed to load {header_path}')

def timestamp_str():
t = datetime.datetime.now()
t_str = t.strftime('%Y-%m-%d %H:%M:%S')
return f'[{t_str}] '

def MakeFileList(input):
file_list = []
if isinstance(input, str):
input = input.split(',')
for item in input:
if not os.path.exists(item):
raise RuntimeError(f'File or directory {item} does not exist')
if os.path.isdir(item):
for root, dirs, files in os.walk(item):
for file in files:
if file.endswith('.root'):
file_list.append(os.path.join(root, file))
else:
file_list.append(item)
return sorted(file_list)

def ApplyCommonDefinitions(df, deltaR=0.4, isData=False):
df = df.Define("genLeptons", """reco_tau::gen_truth::GenLepton::fromNanoAOD(
GenPart_pt, GenPart_eta, GenPart_phi, GenPart_mass,
GenPart_genPartIdxMother, GenPart_pdgId, GenPart_statusFlags,
GenPart_vx, GenPart_vy, GenPart_vz, event)""") \
.Define('L1Tau_mass', 'RVecF(L1Tau_pt.size(), 0.)') \
.Define('L1Tau_p4', 'GetP4(L1Tau_pt, L1Tau_eta, L1Tau_phi, L1Tau_mass)') \
.Define('Jet_p4', 'GetP4(Jet_pt, Jet_eta, Jet_phi, Jet_mass)') \
.Define('Tau_p4', 'GetP4(Tau_pt, Tau_eta, Tau_phi, Tau_mass)') \
.Define('L1Tau_jetIdx', f'FindUniqueMatching(L1Tau_p4, Jet_p4, {deltaR})') \
.Define('L1Tau_tauIdx', f'FindUniqueMatching(L1Tau_p4, Tau_p4, {deltaR})')

if isData:
df = df.Define('L1Tau_Gen_type', 'RVecI(L1Tau_pt.size(), static_cast<int>(TauType::data))')
else:
df = df.Define('GenJet_p4', 'GetP4(GenJet_pt, GenJet_eta, GenJet_phi, GenJet_mass)') \
.Define('GenLepton_p4', 'v_ops::visibleP4(genLeptons)') \
.Define('L1Tau_genLepIndices', f'FindMatchingSet(L1Tau_p4, GenLepton_p4, {deltaR})') \
.Define('L1Tau_genLepUniqueIdx', f'FindUniqueMatching(L1Tau_p4, GenLepton_p4, {deltaR})') \
.Define('L1Tau_genJetUniqueIdx', f'FindUniqueMatching(L1Tau_p4, GenJet_p4, {deltaR})') \
.Define('L1Tau_Gen_type', '''GetTauTypes(genLeptons, L1Tau_genLepUniqueIdx, L1Tau_genLepIndices,
L1Tau_genJetUniqueIdx, false, true)''') \
.Define('L1Tau_Gen_p4', '''GetGenP4(L1Tau_Gen_type, L1Tau_genLepUniqueIdx, L1Tau_genJetUniqueIdx,
genLeptons, GenJet_p4)''') \
.Define('L1Tau_Gen_pt', 'v_ops::pt(L1Tau_Gen_p4)') \
.Define('L1Tau_Gen_eta', 'v_ops::eta(L1Tau_Gen_p4)') \
.Define('L1Tau_Gen_abs_eta', 'abs(L1Tau_Gen_eta)') \
.Define('L1Tau_Gen_phi', 'v_ops::phi(L1Tau_Gen_p4)') \
.Define('L1Tau_Gen_mass', 'v_ops::mass(L1Tau_Gen_p4)') \
.Define('L1Tau_Gen_charge', 'GetGenCharge(L1Tau_Gen_type, L1Tau_genLepUniqueIdx, genLeptons)') \
.Define('L1Tau_Gen_partonFlavour', '''GetGenPartonFlavour(L1Tau_Gen_type, L1Tau_genJetUniqueIdx,
GenJet_partonFlavour)''') \
.Define('L1Tau_Gen_flightLength', 'GetGenFlightLength(L1Tau_Gen_type, L1Tau_genLepUniqueIdx, genLeptons)') \
.Define('L1Tau_Gen_flightLength_rho', 'v_ops::rho(L1Tau_Gen_flightLength)') \
.Define('L1Tau_Gen_flightLength_phi', 'v_ops::phi(L1Tau_Gen_flightLength)') \
.Define('L1Tau_Gen_flightLength_z', 'v_ops::z(L1Tau_Gen_flightLength)')
return df




Loading