diff --git a/Analysis/interface/AnalysisTypes.h b/Analysis/interface/AnalysisTypes.h index a9c4d5371c3..66b6af61b2d 100644 --- a/Analysis/interface/AnalysisTypes.h +++ b/Analysis/interface/AnalysisTypes.h @@ -49,11 +49,15 @@ ENUM_NAMES(GenQcdMatch) = { { GenQcdMatch::Gluon, "gen_gluon" } }; -enum class TauType { e = 0, mu = 1, tau = 2, jet = 3, emb_e = 4, emb_mu = 5, emb_tau = 6, emb_jet = 7, data = 8 }; +enum class TauType { e = 0, mu = 1, tau = 2, jet = 3, emb_e = 4, emb_mu = 5, emb_tau = 6, emb_jet = 7, data = 8, + displaced_e = 9, displaced_mu = 10, displaced_tau = 11, displaced_jet = 12 }; ENUM_NAMES(TauType) = { { TauType::e, "e" }, { TauType::mu, "mu" }, { TauType::tau, "tau" }, { TauType::jet, "jet" }, - { TauType::emb_e, "emb_e" }, { TauType::emb_mu, "emb_mu" }, { TauType::emb_tau, "emb_tau" }, { TauType::emb_jet, "emb_jet" }, - { TauType::data, "data" } + { TauType::emb_e, "emb_e" }, { TauType::emb_mu, "emb_mu" }, + { TauType::emb_tau, "emb_tau" }, { TauType::emb_jet, "emb_jet" }, + { TauType::data, "data" }, + { TauType::displaced_e, "displaced_e" }, { TauType::displaced_mu, "displaced_mu" }, + { TauType::displaced_tau, "displaced_tau" }, { TauType::displaced_jet, "displaced_jet" } }; enum class SampleType { Data = 0, MC = 1, Embedded = 2 }; diff --git a/Preprocessing/HLT/AnalysisTools.h b/Preprocessing/HLT/AnalysisTools.h new file mode 100644 index 00000000000..209fb4f19ec --- /dev/null +++ b/Preprocessing/HLT/AnalysisTools.h @@ -0,0 +1,238 @@ +#pragma once + +#include +#include + +using LorentzVectorXYZ = ROOT::Math::LorentzVector>; +using LorentzVectorM = ROOT::Math::LorentzVector>; +using LorentzVectorE = ROOT::Math::LorentzVector>; +using RVecI = ROOT::VecOps::RVec; +using RVecS = ROOT::VecOps::RVec; +using RVecUC = ROOT::VecOps::RVec; +using RVecF = ROOT::VecOps::RVec; +using RVecB = ROOT::VecOps::RVec; +using RVecVecI = ROOT::VecOps::RVec; +using RVecLV = ROOT::VecOps::RVec; +using RVecSetInt = ROOT::VecOps::RVec>; + +template +auto DeltaPhi(T1 phi1, T2 phi2) -> decltype(phi2 - phi1) { return ROOT::Math::VectorUtil::Phi_mpi_pi(phi2 - phi1); } +template +auto DeltaEta(T1 eta1, T2 eta2) -> decltype(eta2 - eta1) { return eta2 - eta1; } +template +auto DeltaR(T1 eta1, T2 phi1, T3 eta2, T4 phi2) -> decltype(eta1 + phi1 + eta2 + phi2) +{ + const auto dphi = DeltaPhi(phi1, phi2); + const auto deta = DeltaEta(eta1, eta2); + return std::hypot(dphi, deta); +} + +inline RVecS CreateIndexes(size_t vecSize) +{ + RVecS i(vecSize); + std::iota(i.begin(), i.end(), 0); + return i; +} + +template> +RVecI ReorderObjects(const V& varToOrder, const RVecI& indices, size_t nMax=std::numeric_limits::max(), + const Cmp& cmp=Cmp()) +{ + RVecI ordered_indices = indices; + std::sort(ordered_indices.begin(), ordered_indices.end(), [&](int a, int b) { + return cmp(varToOrder.at(a), varToOrder.at(b)); + }); + const size_t n = std::min(ordered_indices.size(), nMax); + ordered_indices.resize(n); + return ordered_indices; +} + +template::digits> +std::string GetBinaryString(T x) +{ + std::bitset bs(x); + std::ostringstream ss; + ss << bs; + return ss.str(); +} + +template +RVecLV GetP4(const V1& pt, const V2& eta, const V3& phi, const V4& mass, const RVecS& indices) +{ + const size_t N = pt.size(); + assert(N == eta.size() && N == phi.size() && N == mass.size()); + RVecLV p4; + p4.reserve(indices.size()); + for(auto idx : indices) { + assert(idx < N); + p4.emplace_back(pt[idx], eta[idx], phi[idx], mass[idx]); + } + return p4; +} + +template +RVecLV GetP4(const V1& pt, const V2& eta, const V3& phi, const V4& mass) +{ + return GetP4(pt, eta, phi, mass, CreateIndexes(pt.size())); +} + + +template +struct InToOut { + using type = TIn; +}; + +template<> +struct InToOut { + using type = int; +}; + +template<> +struct InToOut { + using type = int; +}; + +template::type> +ROOT::VecOps::RVec GetVar(const RVecI& idx, const ROOT::VecOps::RVec& var) +{ + ROOT::VecOps::RVec result(idx.size(), TOut(0)); + for(size_t i = 0; i < idx.size(); ++i) { + if(idx[i] >= 0) + result[i] = var[idx[i]]; + } + return result; +} + +template::type> +ROOT::VecOps::RVec> GetVarVec(const RVecSetInt& matched, const ROOT::VecOps::RVec& var) +{ + ROOT::VecOps::RVec> result(matched.size()); + for(size_t i = 0; i < matched.size(); ++i) { + for(auto idx : matched[i]) { + result[i].push_back(var[idx]); + } + } + return result; +} + +inline RVecB RemoveOverlaps(const RVecLV& obj_p4, const RVecB& pre_sel, const std::vector& other_objects, + size_t min_number_of_non_overlaps, double min_deltaR) +{ + RVecB result(pre_sel); + const double min_deltaR2 = std::pow(min_deltaR, 2); + + const auto hasMinNumberOfNonOverlaps = [&](const LorentzVectorM& p4) { + size_t cnt = 0; + for(const auto& other_obj_col : other_objects) { + for(const auto& other_obj_p4 : other_obj_col) { + const double dR2 = ROOT::Math::VectorUtil::DeltaR2(p4, other_obj_p4); + if(dR2 > min_deltaR2) { + ++cnt; + if(cnt >= min_number_of_non_overlaps) + return true; + } + } + } + return false; + }; + + for(size_t obj_idx = 0; obj_idx < obj_p4.size(); ++obj_idx) { + result[obj_idx] = pre_sel[obj_idx] && hasMinNumberOfNonOverlaps(obj_p4.at(obj_idx)); + } + return result; +} + +inline std::pair FindMatching(const LorentzVectorM& target_p4, const RVecLV& ref_p4, double deltaR_thr) +{ + double deltaR_min = deltaR_thr; + int current_idx = -1; + for(int refIdx = 0; refIdx < ref_p4.size(); ++refIdx) { + const auto dR_targetRef = ROOT::Math::VectorUtil::DeltaR(target_p4, ref_p4.at(refIdx)); + if(dR_targetRef < deltaR_min) { + deltaR_min = dR_targetRef; + current_idx = refIdx; + } + } + return std::make_pair(current_idx, deltaR_min); +} + +inline RVecI FindBestMatching(const RVecLV& target_p4, const RVecLV& ref_p4, double deltaR_thr) +{ + RVecI targetIndices(target_p4.size()); + for(size_t targetIdx = 0; targetIdx < target_p4.size(); ++targetIdx) { + const auto match = FindMatching(target_p4[targetIdx], ref_p4, deltaR_thr); + targetIndices[targetIdx] = match.first; + } + return targetIndices; +} + +inline RVecI FindUniqueMatching(const RVecLV& target_p4, const RVecLV& ref_p4, double deltaR_thr) +{ + RVecI targetIndices(target_p4.size(), -1); + const auto default_matching = std::make_pair(-1, deltaR_thr); + std::vector> matchings(ref_p4.size(), default_matching); + auto refIndices = CreateIndexes(ref_p4.size()); + std::sort(refIndices.begin(), refIndices.end(), [&](int a, int b) { + if(ref_p4[a].pt() != ref_p4[b].pt()) return ref_p4[a].pt() > ref_p4[b].pt(); + if(ref_p4[a].mass() != ref_p4[b].mass()) return ref_p4[a].mass() > ref_p4[b].mass(); + const auto a_eta = std::abs(ref_p4[a].eta()); + const auto b_eta = std::abs(ref_p4[b].eta()); + if(a_eta != b_eta) return a_eta < b_eta; + const auto a_dir = std::make_pair(ref_p4[a].eta(), ref_p4[a].phi()); + const auto b_dir = std::make_pair(ref_p4[b].eta(), ref_p4[b].phi()); + return a_dir < b_dir; + }); + for(int refIdx : refIndices) { + matchings[refIdx] = FindMatching(ref_p4[refIdx], target_p4, deltaR_thr); + for(int prevIdx : refIndices) { + if(prevIdx == refIdx) break; + if(matchings[prevIdx].first == matchings[refIdx].first) { + matchings[refIdx] = default_matching; + break; + } + } + } + for(int refIdx = 0; refIdx < ref_p4.size(); ++refIdx) { + if(matchings[refIdx].first >= 0) + targetIndices[matchings[refIdx].first] = refIdx; + } + return targetIndices; +} + +inline RVecSetInt FindMatchingSet(const RVecLV& target_p4, const RVecLV& ref_p4, double dR_thr, + const RVecB& pre_sel_target, const RVecB& pre_sel_ref) +{ + assert(target_p4.size() == pre_sel_target.size()); + assert(ref_p4.size() == pre_sel_ref.size()); + RVecSetInt findMatching(pre_sel_target.size()); + for(size_t ref_idx = 0; ref_idx < pre_sel_ref.size(); ++ref_idx) { + if(!pre_sel_ref[ref_idx]) continue; + for(size_t target_idx = 0; target_idx < pre_sel_target.size(); ++target_idx) { + if(!pre_sel_target[target_idx]) continue; + const auto dR = ROOT::Math::VectorUtil::DeltaR(target_p4[target_idx], ref_p4[ref_idx]); + if(dR < dR_thr) + findMatching[target_idx].insert(ref_idx); + } + } + return findMatching; +} + +inline RVecSetInt FindMatchingSet(const RVecLV& target_p4, const RVecLV& ref_p4, double dR_thr) +{ + return FindMatchingSet(target_p4, ref_p4, dR_thr, RVecB(target_p4.size(), true), RVecB(ref_p4.size(), true)); +} + +namespace v_ops{ + template + RVecF pt(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.pt(); }); } + template + RVecF eta(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.eta(); }); } + template + RVecF phi(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.phi(); }); } + template + RVecF mass(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.mass(); }); } + template + RVecF rho(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.rho(); }); } + template + RVecF z(const LV& p4) { return ROOT::VecOps::Map(p4, [](const auto& p4) -> float { return p4.z(); }); } +} \ No newline at end of file diff --git a/Preprocessing/HLT/AnalysisTools.py b/Preprocessing/HLT/AnalysisTools.py new file mode 100644 index 00000000000..6283681dd82 --- /dev/null +++ b/Preprocessing/HLT/AnalysisTools.py @@ -0,0 +1,109 @@ +import datetime +from enum import Enum +import ROOT +import os + +class TauType(Enum): + other = -1 + e = 0 + mu = 1 + tau = 2 + jet = 3 + emb_e = 4 + emb_mu = 5 + emb_tau = 6 + emb_jet = 7 + data = 8 + displaced_e = 9 + displaced_mu = 10 + displaced_tau = 11 + displaced_jet = 12 + +def ListToVector(list, type="string"): + vec = ROOT.std.vector(type)() + for item in list: + vec.push_back(item) + return vec + +def GenerateEnumCppString(cls): + enum_string = f"enum class {cls.__name__} : int {{\n" + for item in cls: + enum_string += f" {item.name} = {item.value},\n" + enum_string += "};" + return enum_string + +def PrepareRootEnv(): + headers_dir = os.path.dirname(os.path.abspath(__file__)) + enums = [ TauType ] + headers = [ 'AnalysisTools.h', 'GenLepton.h', 'GenTools.h', 'TupleMaker.h' ] + for cls in enums: + cls_str = GenerateEnumCppString(cls) + if not ROOT.gInterpreter.Declare(cls_str): + raise RuntimeError(f'Failed to declare {cls.__name__}') + for header in headers: + header_path = os.path.join(headers_dir, header) + if not ROOT.gInterpreter.Declare(f'#include "{header_path}"'): + raise RuntimeError(f'Failed to load {header_path}') + +def timestamp_str(): + t = datetime.datetime.now() + t_str = t.strftime('%Y-%m-%d %H:%M:%S') + return f'[{t_str}] ' + +def MakeFileList(input): + file_list = [] + if isinstance(input, str): + input = input.split(',') + for item in input: + if not os.path.exists(item): + raise RuntimeError(f'File or directory {item} does not exist') + if os.path.isdir(item): + for root, dirs, files in os.walk(item): + for file in files: + if file.endswith('.root'): + file_list.append(os.path.join(root, file)) + else: + file_list.append(item) + return sorted(file_list) + +def ApplyCommonDefinitions(df, deltaR=0.4, isData=False): + df = df.Define("genLeptons", """reco_tau::gen_truth::GenLepton::fromNanoAOD( + GenPart_pt, GenPart_eta, GenPart_phi, GenPart_mass, + GenPart_genPartIdxMother, GenPart_pdgId, GenPart_statusFlags, + GenPart_vx, GenPart_vy, GenPart_vz, event)""") \ + .Define('L1Tau_mass', 'RVecF(L1Tau_pt.size(), 0.)') \ + .Define('L1Tau_p4', 'GetP4(L1Tau_pt, L1Tau_eta, L1Tau_phi, L1Tau_mass)') \ + .Define('Jet_p4', 'GetP4(Jet_pt, Jet_eta, Jet_phi, Jet_mass)') \ + .Define('Tau_p4', 'GetP4(Tau_pt, Tau_eta, Tau_phi, Tau_mass)') \ + .Define('L1Tau_jetIdx', f'FindUniqueMatching(L1Tau_p4, Jet_p4, {deltaR})') \ + .Define('L1Tau_tauIdx', f'FindUniqueMatching(L1Tau_p4, Tau_p4, {deltaR})') + + if isData: + df = df.Define('L1Tau_Gen_type', 'RVecI(L1Tau_pt.size(), static_cast(TauType::data))') + else: + df = df.Define('GenJet_p4', 'GetP4(GenJet_pt, GenJet_eta, GenJet_phi, GenJet_mass)') \ + .Define('GenLepton_p4', 'v_ops::visibleP4(genLeptons)') \ + .Define('L1Tau_genLepIndices', f'FindMatchingSet(L1Tau_p4, GenLepton_p4, {deltaR})') \ + .Define('L1Tau_genLepUniqueIdx', f'FindUniqueMatching(L1Tau_p4, GenLepton_p4, {deltaR})') \ + .Define('L1Tau_genJetUniqueIdx', f'FindUniqueMatching(L1Tau_p4, GenJet_p4, {deltaR})') \ + .Define('L1Tau_Gen_type', '''GetTauTypes(genLeptons, L1Tau_genLepUniqueIdx, L1Tau_genLepIndices, + L1Tau_genJetUniqueIdx, false, true)''') \ + .Define('L1Tau_Gen_p4', '''GetGenP4(L1Tau_Gen_type, L1Tau_genLepUniqueIdx, L1Tau_genJetUniqueIdx, + genLeptons, GenJet_p4)''') \ + .Define('L1Tau_Gen_pt', 'v_ops::pt(L1Tau_Gen_p4)') \ + .Define('L1Tau_Gen_eta', 'v_ops::eta(L1Tau_Gen_p4)') \ + .Define('L1Tau_Gen_abs_eta', 'abs(L1Tau_Gen_eta)') \ + .Define('L1Tau_Gen_phi', 'v_ops::phi(L1Tau_Gen_p4)') \ + .Define('L1Tau_Gen_mass', 'v_ops::mass(L1Tau_Gen_p4)') \ + .Define('L1Tau_Gen_charge', 'GetGenCharge(L1Tau_Gen_type, L1Tau_genLepUniqueIdx, genLeptons)') \ + .Define('L1Tau_Gen_partonFlavour', '''GetGenPartonFlavour(L1Tau_Gen_type, L1Tau_genJetUniqueIdx, + GenJet_partonFlavour)''') \ + .Define('L1Tau_Gen_flightLength', 'GetGenFlightLength(L1Tau_Gen_type, L1Tau_genLepUniqueIdx, genLeptons)') \ + .Define('L1Tau_Gen_flightLength_rho', 'v_ops::rho(L1Tau_Gen_flightLength)') \ + .Define('L1Tau_Gen_flightLength_phi', 'v_ops::phi(L1Tau_Gen_flightLength)') \ + .Define('L1Tau_Gen_flightLength_z', 'v_ops::z(L1Tau_Gen_flightLength)') + return df + + + + diff --git a/Preprocessing/HLT/EntryQueue.h b/Preprocessing/HLT/EntryQueue.h new file mode 100644 index 00000000000..46f368886dc --- /dev/null +++ b/Preprocessing/HLT/EntryQueue.h @@ -0,0 +1,300 @@ +/*! Definiton of a thread-safe fixed size entry queue. */ + +#pragma once + +#include +#include +#include + +namespace analysis { + +template +class EntryQueue { +public: + using Entry = _Entry; + using Queue = std::queue; + using Mutex = std::mutex; + using Lock = std::unique_lock; + using CondVar = std::condition_variable; + +public: + explicit EntryQueue(size_t max_size, size_t max_entries = std::numeric_limits::max()) + : max_size_(max_size), max_entries_(max_entries), n_entries_(0), input_depleted_(false), output_depleted_(false) + { + } + + bool Push(const Entry& entry) + { + bool entry_is_pushed = false; + { + Lock lock(mutex_); + cond_var_.wait(lock, [&] { return queue_.size() < max_size_ || n_entries_ >= max_entries_ || output_depleted_; }); + if(!(n_entries_ >= max_entries_ || input_depleted_)) { + queue_.push(entry); + ++n_entries_; + entry_is_pushed = true; + } + } + cond_var_.notify_all(); + return entry_is_pushed; + } + + bool Pop(Entry& entry) + { + bool entry_is_valid = false; + { + Lock lock(mutex_); + cond_var_.wait(lock, [&] { return !queue_.empty() || input_depleted_; }); + if(!queue_.empty()) { + entry = queue_.front(); + entry_is_valid = true; + queue_.pop(); + } + } + cond_var_.notify_all(); + return entry_is_valid; + } + + void SetInputDepleted() + { + { + Lock lock(mutex_); + input_depleted_ = true; + } + cond_var_.notify_all(); + } + + void SetOutputDepleted() + { + { + Lock lock(mutex_); + output_depleted_ = true; + } + cond_var_.notify_all(); + } + +private: + Queue queue_; + const size_t max_size_, max_entries_; + size_t n_entries_; + bool input_depleted_, output_depleted_; + Mutex mutex_; + CondVar cond_var_; +}; + +struct QueueState { + size_t max_size, max_entries, n_entries, waiting_for_entry, n_current_max_size_reached, n_max_size_reached_total, + n_zero_size_reached, queue_size; + bool input_depleted, output_depleted; + + QueueState(size_t max_size_, size_t max_entries_) + : max_size(max_size_), max_entries(max_entries_), n_entries(0), waiting_for_entry(0), n_current_max_size_reached(0), + n_max_size_reached_total(0), n_zero_size_reached(0), queue_size(0), input_depleted(false), output_depleted(false) + { + } +}; + +template +class MultiSlotEntryQueue { +public: + using Entry = _Entry; + using Queue = std::queue; + using Mutex = std::mutex; + using Lock = std::unique_lock; + using CondVar = std::condition_variable; + + struct QueueDesc { + QueueState state; + Queue queue; + QueueDesc(size_t max_size_, size_t max_entries_) : state(max_size_, max_entries_) {} + }; + + enum class PushResult { + EntryPushedToEmptyQueue, EntryPushedToNonEmptyQueue, MaxSizeReached, MaxEntriesReached, OutputDepleted, + }; + +public: + size_t AddQueue(size_t max_size, size_t max_entries = std::numeric_limits::max()) + { + size_t slot_id; + { + Lock lock(mutex_); + queues_.emplace_back(std::make_unique(max_size, max_entries)); + slot_id = queues_.size() - 1; + } + cond_var_.notify_all(); + return slot_id; + } + + PushResult Push(size_t slot_id, const Entry& entry) + { + PushResult push_result; + { + Lock lock(mutex_); + CheckSlotId(slot_id); + cond_var_.wait(lock, [&] { + const auto& desc = *queues_.at(slot_id); + const auto& state = desc.state; + if(desc.queue.size() < state.max_size || state.n_entries >= state.max_entries || state.output_depleted) + return true; + for(const auto& queue_desc : queues_) { + if(queue_desc->state.waiting_for_entry > 0) + return true; + } + return false; + }); + auto& desc = *queues_.at(slot_id); + auto& state = desc.state; + if(desc.queue.size() >= state.max_size) { + push_result = PushResult::MaxSizeReached; + ++state.n_current_max_size_reached; + ++state.n_max_size_reached_total; + } else if(state.n_entries >= state.max_entries) { + push_result = PushResult::MaxEntriesReached; + } else if(state.output_depleted) { + push_result = PushResult::OutputDepleted; + } else { + if(desc.queue.empty()) { + ++state.n_zero_size_reached; + push_result = PushResult::EntryPushedToEmptyQueue; + } else { + push_result = PushResult::EntryPushedToNonEmptyQueue; + } + desc.queue.push(entry); + ++state.n_entries; + } + } + cond_var_.notify_all(); + return push_result; + } + + bool Pop(size_t slot_id, Entry& entry) + { + bool entry_is_valid = false, need_to_wait = true; + const auto check_condition = [&]() { + return !queues_.at(slot_id)->queue.empty() || queues_.at(slot_id)->state.input_depleted; + }; + const auto pop_entry = [&]() { + auto& desc = *queues_.at(slot_id); + if(!desc.queue.empty()) { + entry = desc.queue.front(); + entry_is_valid = true; + desc.queue.pop(); + } + --desc.state.waiting_for_entry; + }; + { + Lock lock(mutex_); + CheckSlotId(slot_id); + ++queues_.at(slot_id)->state.waiting_for_entry; + if(check_condition()) { + pop_entry(); + need_to_wait = false; + } + } + if(need_to_wait) { + cond_var_.notify_all(); + Lock lock(mutex_); + cond_var_.wait(lock, check_condition); + pop_entry(); + } + cond_var_.notify_all(); + return entry_is_valid; + } + + void SetInputDepleted(size_t slot_id) + { + { + Lock lock(mutex_); + CheckSlotId(slot_id); + queues_.at(slot_id)->state.input_depleted = true; + } + cond_var_.notify_all(); + } + + void SetInputDepleted() + { + { + Lock lock(mutex_); + for(auto& desc : queues_) + desc->state.input_depleted = true; + } + cond_var_.notify_all(); + } + + + void SetOutputDepleted(size_t slot_id) + { + { + Lock lock(mutex_); + CheckSlotId(slot_id); + queues_.at(slot_id)->state.output_depleted = true; + } + cond_var_.notify_all(); + } + + void SetOutputDepleted() + { + { + Lock lock(mutex_); + for(auto& desc : queues_) + desc->state.output_depleted = true; + } + cond_var_.notify_all(); + } + + bool IsMaxEntriesReachedForAll() + { + Lock lock(mutex_); + for(const auto& desc : queues_) { + if(desc->state.n_entries < desc->state.max_entries) + return false; + } + return true; + } + + QueueState GetQueueState(size_t slot_id) + { + Lock lock(mutex_); + CheckSlotId(slot_id); + QueueState state = queues_.at(slot_id)->state; + state.queue_size = queues_.at(slot_id)->queue.size(); + return state; + } + + void SetMaxSize(size_t slot_id, size_t max_size) + { + { + Lock lock(mutex_); + CheckSlotId(slot_id); + queues_.at(slot_id)->state.max_size = max_size; + queues_.at(slot_id)->state.n_current_max_size_reached = 0; + } + cond_var_.notify_all(); + } + +private: + void CheckSlotId(size_t slot_id) const + { + if(slot_id >= queues_.size()) + throw std::runtime_error("MultiSlotEntryQueue: slot ID out of range"); + } + +private: + std::vector> queues_; + Mutex mutex_; + CondVar cond_var_; +}; + +inline std::ostream& operator<<(std::ostream& out, const QueueState& state) +{ + out << "max_size=" << state.max_size << ", max_entries=" << state.max_entries << ", n_entries=" << state.n_entries + << ", waiting_for_entry=" << state.waiting_for_entry + << ", n_current_max_size_reached=" << state.n_current_max_size_reached + << ", n_max_size_reached_total=" << state.n_max_size_reached_total + << ", n_zero_size_reached=" << state.n_zero_size_reached << ", queue_size=" << state.queue_size + << ", input_depleted=" << state.input_depleted << ", output_depleted=" << state.output_depleted; + return out; +} + +} // namespace analysis diff --git a/Preprocessing/HLT/GenLepton.h b/Preprocessing/HLT/GenLepton.h new file mode 100644 index 00000000000..d9efccdf27c --- /dev/null +++ b/Preprocessing/HLT/GenLepton.h @@ -0,0 +1,637 @@ +/*! Definition of lepton (ele, mu or tau) at the generator level. +Author: Konstantin Androsov +*/ + +#pragma once + +#include + +#include +#include +#include +#include +#include "GenStatusFlags.h" + +namespace reco_tau { +namespace gen_truth { + +using LorentzVectorXYZ = ROOT::Math::LorentzVector>; +using LorentzVectorM = ROOT::Math::LorentzVector>; +using Point3D = ROOT::Math::PositionVector3D>; +using Displacement3D = ROOT::Math::DisplacementVector3D>; + +class GenParticle { +public: + enum class PdgId { + electron = 11, electron_neutrino = 12, muon = 13, muon_neutrino = 14, tau = 15, tau_neutrino = 16, + pi0 = 111, pi = 211, K0_L = 130, K0_S = 310, K0 = 311, K = 321, a1_1260_plus = 20213, + down = 1, up = 2, strange = 3, charm = 4, bottom = 5, top = 6, + gluon = 21, photon = 22, Z = 23, W = 24, h0 = 25 + }; + + static const std::set& gluonQuarks() { + static const std::set s = { PdgId::gluon, PdgId::down, PdgId::up, PdgId::strange, PdgId::charm, PdgId::bottom, PdgId::top }; + return s; + } + + static const std::set& ChargedLeptons() { + static const std::set s = { PdgId::electron, PdgId::muon, PdgId::tau }; + return s; + } + + static const std::set& NeutralLeptons() { + static const std::set s = { PdgId::electron_neutrino, PdgId::muon_neutrino, PdgId::tau_neutrino }; + return s; + } + + static const std::set& ChargedHadrons() { + static const std::set s = { PdgId::pi, PdgId::K }; + return s; + } + + static const std::set& NeutralHadrons() { + static const std::set s = { PdgId::pi0, PdgId::K0_L, PdgId::K0_S, PdgId::K0 }; + return s; + } + static double GetMass(PdgId pdgId, double nanoAODmass) { + static const std::map pdgId_Masses { + {PdgId::electron,0.0005109989461}, + {PdgId::electron_neutrino,0.}, + {PdgId::muon,0.1056583745}, + {PdgId::muon_neutrino,0.}, + {PdgId::tau,1.77686}, + {PdgId::tau_neutrino,0.}, + {PdgId::photon, 0.}, + {PdgId::pi, 0.13957039}, + {PdgId::pi0, 0.1349768}, + {PdgId::K, 0.493677}, + {PdgId::K0, 0.497611}, + }; + auto iter=pdgId_Masses.find(pdgId); + if(iter==pdgId_Masses.end()) return nanoAODmass; + return iter->second; + } + + int getCharge() const { + static const std::map pdgId_charges { + {PdgId::electron, -1}, + {PdgId::electron_neutrino, 0}, + {PdgId::muon, -1}, + {PdgId::muon_neutrino, 0}, + {PdgId::tau, -1}, + {PdgId::tau_neutrino, 0}, + {PdgId::photon, 0}, + {PdgId::pi, +1}, + {PdgId::pi0, 0}, + {PdgId::K, +1}, + {PdgId::K0, 0}, + {PdgId::K0_S, 0}, + {PdgId::K0_L, 0}, + {PdgId::h0, 0}, + {PdgId::Z, 0}, + {PdgId::W, 1}, + }; + auto iter=pdgId_charges.find(pdgCode()); + if(iter==pdgId_charges.end()) return std::numeric_limits::max(); + int charge = iter->second; + if(pdgId < 0) + charge *= -1; + return charge; + } + int pdgId{0}; + int charge{0}; + bool isFirstCopy{false}, isLastCopy{false}; + bool hasMissingDaughters{false}; + LorentzVectorM p4; + Point3D vertex; + std::set mothers; + std::set daughters; + + PdgId pdgCode() const { return static_cast(std::abs(pdgId)); } + + +}; + +inline std::ostream& operator<<(std::ostream& os, const GenParticle& p) +{ + os << "pdgId=" << p.pdgId << " pt=" << p.p4.pt() << " eta=" << p.p4.eta() << " phi=" << p.p4.phi() + << " E=" << p.p4.energy() << " m=" << p.p4.mass() + << " vx=" << p.vertex.x() << " vy=" << p.vertex.y() << " vz=" << p.vertex.z() << " vrho=" << p.vertex.rho() + << " vr=" << p.vertex.r() << " q=" << p.charge; + return os; +} + +class GenLepton { +public: + enum class Kind { PromptElectron = 1, PromptMuon = 2, TauDecayedToElectron = 3, TauDecayedToMuon = 4, + TauDecayedToHadrons = 5, Other = 6 }; + + static constexpr size_t MaxNumberOfParticles = 1000; + + template + static std::vector fromGenParticleCollection(const std::vector& gen_particles) + { + std::vector leptons; + std::map processed_particles; + for(const auto& particle : gen_particles) { + if(processed_particles.count(&particle)) continue; + if(!(particle.statusFlags().isPrompt() && particle.statusFlags().isFirstCopy())) continue; + const int abs_pdg = std::abs(particle.pdgId()); + if(!GenParticle::ChargedLeptons().count(static_cast(abs_pdg))) + continue; + + GenLepton lepton; + FillImpl fillImpl(lepton, processed_particles); + fillImpl.FillAll(&particle); + lepton.initialize(); + + leptons.push_back(lepton); + } + return leptons; + } + template + static ROOT::VecOps::RVec fromNanoAOD(const FloatVector& GenPart_pt, + const FloatVector& GenPart_eta, + const FloatVector& GenPart_phi, + const FloatVector& GenPart_mass, + const IntVector1& GenPart_genPartIdxMother, + const IntVector2& GenPart_pdgId, + const IntVector3& GenPart_statusFlags, + const FloatVector& GenPart_vx = FloatVector(), + const FloatVector& GenPart_vy = FloatVector(), + const FloatVector& GenPart_vz = FloatVector(), + int event = 0) { + try { + std::vector genLeptons; + std::set processed_particles; + for(size_t genPart_idx=0; genPart_idx(abs_pdg))) + continue; + GenLepton lepton; + FillImplNano fillImplNano( + lepton, processed_particles, GenPart_pt, GenPart_eta, GenPart_phi, GenPart_mass, + GenPart_genPartIdxMother, GenPart_pdgId, GenPart_statusFlags, GenPart_vx, GenPart_vy, GenPart_vz); + fillImplNano.FillAll(genPart_idx); + lepton.initialize(); + genLeptons.push_back(lepton); + } + return genLeptons; + } catch(std::runtime_error& e) { + std::cerr << "Event id = " << event << std::endl; + throw; + } + } + template + static GenLepton fromRootTuple(int lastMotherIndex, + const IntVector& genParticle_pdgId, + const LongVector& genParticle_mother, + const IntVector& genParticle_charge, + const IntVector& genParticle_isFirstCopy, + const IntVector& genParticle_isLastCopy, + const FloatVector& genParticle_pt, + const FloatVector& genParticle_eta, + const FloatVector& genParticle_phi, + const FloatVector& genParticle_mass, + const FloatVector& genParticle_vtx_x, + const FloatVector& genParticle_vtx_y, + const FloatVector& genParticle_vtx_z) + { + try { + + const size_t N = genParticle_pdgId.size(); + assert(N <= MaxNumberOfParticles); + assert(genParticle_mother.size() == N); + assert(genParticle_charge.size() == N); + assert(genParticle_isFirstCopy.size() == N); + assert(genParticle_isLastCopy.size() == N); + assert(genParticle_pt.size() == N); + assert(genParticle_eta.size() == N); + assert(genParticle_phi.size() == N); + assert(genParticle_mass.size() == N); + assert(genParticle_vtx_x.size() == N); + assert(genParticle_vtx_y.size() == N); + assert(genParticle_vtx_z.size() == N); + assert(lastMotherIndex >= -1); + + GenLepton lepton; + lepton.particles_->resize(N); + lepton.firstCopy_ = &lepton.particles_->at(lastMotherIndex + 1); + for(size_t n = 0; n < N; ++n) { + GenParticle& p = lepton.particles_->at(n); + p.pdgId = genParticle_pdgId.at(n); + p.charge = genParticle_charge.at(n); + p.isFirstCopy = genParticle_isFirstCopy.at(n); + p.isLastCopy = genParticle_isLastCopy.at(n); + p.p4 = LorentzVectorM(genParticle_pt.at(n), genParticle_eta.at(n), + genParticle_phi.at(n), genParticle_mass.at(n)); + p.vertex = Point3D(genParticle_vtx_x.at(n), genParticle_vtx_y.at(n), genParticle_vtx_z.at(n)); + std::set mothers; + Long64_t mother_encoded = genParticle_mother.at(n); + if(mother_encoded >= 0) { + do { + Long64_t mother_index = mother_encoded % static_cast(MaxNumberOfParticles); + mother_encoded = (mother_encoded - mother_index) / static_cast(MaxNumberOfParticles); + mothers.insert(static_cast(mother_index)); + } while(mother_encoded > 0); + } + for(size_t mother_index : mothers) { + assert(mother_index < N); + p.mothers.insert(&lepton.particles_->at(mother_index)); + lepton.particles_->at(mother_index).daughters.insert(&p); + } + } + lepton.initialize(); + return lepton; + } catch(std::exception& e) { + std::cerr << "ERROR: " << e.what() << std::endl; + throw; + } + } + + static const GenParticle* findTerminalCopy(const GenParticle& genParticle, bool first) + { + const GenParticle* particle = &genParticle; + while((first && !particle->isFirstCopy) || (!first && !particle->isLastCopy)) { + bool nextCopyFound = false; + const auto& ref = first ? particle->mothers : particle->daughters; + for(const GenParticle* p : ref) { + if(p->pdgId == particle->pdgId) { + particle = &(*p); + nextCopyFound = true; + break; + } + } + if(!nextCopyFound) + ThrowErrorStatic("unable to find a terminal copy."); + } + return particle; + } + + const std::vector& allParticles() const { return *particles_; } + const std::set& mothers() const { return firstCopy_->mothers; } + const GenParticle& firstCopy() const { return *firstCopy_; } + const GenParticle& lastCopy() const { return *lastCopy_; } + Kind kind() const { return kind_; } + int charge() const { return firstCopy().charge; } + const std::set& finalStateFromDecay() const { return finalStateFromDecay_; } + const std::set& finalStateFromRadiation() const { return finalStateFromRadiation_; } + const std::set& hadrons() const { return hadrons_; } + // Intermediate hadrons are hadrons that decayed hadronically + const std::set& intermediateHadrons() const { return intermediateHadrons_; } + const std::set& otherParticles() const { return other_; } + + const LorentzVectorXYZ& visibleP4() const { return visibleP4_; } + const LorentzVectorXYZ& radiatedP4() const { return radiatedP4_; } + + size_t nChargedHadrons() const { return nChargedHadrons_; } + size_t nNeutralHadrons() const { return nNeutralHadrons_; } + size_t nFinalStateElectrons() const { return nFinalStateElectrons_; } + size_t nFinalStateMuons() const { return nFinalStateMuons_; } + size_t nFinalStateNeutrinos() const { return nFinalStateNeutrinos_; } + + const Point3D vertex() const { return firstCopy().vertex; } + const Displacement3D flightLength() const + { + if(mothers().empty()) + return Displacement3D(0., 0., 0.); + return vertex() - (*mothers().begin())->vertex; + } + + void PrintDecay(const GenParticle& particle, const std::string& pre, std::ostream& os) const + { + os << particle << std::endl; + + for(auto d_iter = particle.daughters.begin(); d_iter != particle.daughters.end(); ++d_iter) { + const GenParticle& daughter = **d_iter; + os << pre << "+-> "; + const char pre_first = std::next(d_iter) == particle.daughters.end() ? ' ' : '|'; + const std::string pre_d = pre + pre_first + " "; + PrintDecay(daughter, pre_d, os); + } + } + + void PrintDecay(std::ostream& os) const + { + PrintDecay(firstCopy(), "", os); + } + + // Keeping the default constructor public to stay compatible with RDataFrame + GenLepton() : particles_(std::make_shared>()) {} + +private: + template + struct FillImpl { + static constexpr size_t NoneIndex = std::numeric_limits::max(); + + GenLepton& lepton_; + std::map& processedParticles_; + std::map> relations_; + + FillImpl(GenLepton& lepton, std::map& processedParticles) : + lepton_(lepton), processedParticles_(processedParticles) + { + } + + void FillAll(const GenParticleT* particle) + { + size_t last_mother_index = NoneIndex; + + if(!particle->motherRefVector().empty()) { + for(const auto& mother : particle->motherRefVector()) + FillDaughters(mother.get(), NoneIndex, false); + last_mother_index = particle->motherRefVector().size() - 1; + } + + FillDaughters(particle, last_mother_index, true); + + if(last_mother_index != NoneIndex) { + lepton_.firstCopy_ = &lepton_.particles_->at(last_mother_index + 1); + for(size_t mother_index = 0; mother_index <= last_mother_index; ++mother_index) { + lepton_.particles_->at(last_mother_index + 1).mothers.insert(&lepton_.particles_->at(mother_index)); + lepton_.particles_->at(mother_index).daughters.insert(lepton_.firstCopy_); + } + } else { + lepton_.firstCopy_ = &lepton_.particles_->at(0); + } + + for(const auto& [mother, daughters] : relations_) { + for(size_t daughter : daughters) { + lepton_.particles_->at(mother).daughters.insert(&lepton_.particles_->at(daughter)); + lepton_.particles_->at(daughter).mothers.insert(&lepton_.particles_->at(mother)); + } + } + } + + void FillDaughters(const GenParticleT* p, size_t mother_index, bool fill_recursively) + { + if(fill_recursively) { + if(processedParticles_.count(p)) { + const int proc_p_index = processedParticles_.at(p); + if(proc_p_index >= 0) + relations_[mother_index].insert(static_cast(proc_p_index)); + return; + // ThrowErrorStatic("particle has already been processed."); + } + } + + GenParticle output_p; + output_p.pdgId = p->pdgId(); + output_p.charge = p->charge(); + output_p.isFirstCopy = p->statusFlags().isFirstCopy(); + output_p.isLastCopy = p->statusFlags().isLastCopy(); + output_p.p4 = p->p4(); + output_p.vertex = p->vertex(); + + size_t p_index = lepton_.particles_->size(); + if(mother_index != NoneIndex) + relations_[mother_index].insert(p_index); + + lepton_.particles_->push_back(output_p); + + if(fill_recursively) { + processedParticles_[p] = static_cast(p_index); + for(auto d : p->daughterRefVector()) + FillDaughters(&*d, p_index, true); + } + } + }; + template + struct FillImplNano { + static constexpr size_t NoneIndex = std::numeric_limits::max(); + + GenLepton& lepton_; + std::set & processedParticles_; + std::map> relations_; + const FloatVector& GenPart_pt_; + const FloatVector& GenPart_eta_; + const FloatVector& GenPart_phi_; + const FloatVector& GenPart_mass_; + const IntVector1& GenPart_genPartIdxMother_; + const IntVector2& GenPart_pdgId_; + const IntVector3& GenPart_statusFlags_; + const FloatVector& GenPart_vx_; + const FloatVector& GenPart_vy_; + const FloatVector& GenPart_vz_; + + FillImplNano(GenLepton& lepton, std::set& processedParticles, + const FloatVector& GenPart_pt , + const FloatVector& GenPart_eta, + const FloatVector& GenPart_phi, + const FloatVector& GenPart_mass, + const IntVector1& GenPart_genPartIdxMother, + const IntVector2& GenPart_pdgId, + const IntVector3& GenPart_statusFlags, + const FloatVector& GenPart_vx, + const FloatVector& GenPart_vy, + const FloatVector& GenPart_vz) : + lepton_(lepton), processedParticles_(processedParticles), + GenPart_pt_(GenPart_pt), GenPart_eta_(GenPart_eta), GenPart_phi_(GenPart_phi), + GenPart_mass_(GenPart_mass), GenPart_genPartIdxMother_(GenPart_genPartIdxMother), + GenPart_pdgId_(GenPart_pdgId), GenPart_statusFlags_(GenPart_statusFlags), + GenPart_vx_(GenPart_vx), GenPart_vy_(GenPart_vy), GenPart_vz_(GenPart_vz) + { + } + + void FillAll(size_t partIdx) + { + size_t last_mother_index = NoneIndex; + + if(GenPart_genPartIdxMother_.at(partIdx)>=0) { + FillDaughters(GenPart_genPartIdxMother_.at(partIdx), NoneIndex, false); + last_mother_index = 0; + } + FillDaughters(partIdx, last_mother_index, true); + + if(last_mother_index != NoneIndex) { + lepton_.firstCopy_ = &lepton_.particles_->at(last_mother_index + 1); + for(size_t mother_index = 0; mother_index <= last_mother_index; ++mother_index) { + lepton_.particles_->at(last_mother_index + 1).mothers.insert(&lepton_.particles_->at(mother_index)); + lepton_.particles_->at(mother_index).daughters.insert(lepton_.firstCopy_); + } + } else { + lepton_.firstCopy_ = &lepton_.particles_->at(0); + } + + for(const auto& [mother, daughters] : relations_) { + for(size_t daughter : daughters) { + lepton_.particles_->at(mother).daughters.insert(&lepton_.particles_->at(daughter)); + lepton_.particles_->at(daughter).mothers.insert(&lepton_.particles_->at(mother)); + } + } + } + + void FillDaughters(size_t part_idx, size_t mother_index, bool fill_recursively) + { + if(fill_recursively) { + if(processedParticles_.count(part_idx)) { + ThrowErrorStatic("particle already processed!"); + } + } + + GenParticle output_p; + output_p.pdgId = GenPart_pdgId_.at(part_idx); + output_p.charge = output_p.getCharge(); + GenStatusFlags output_pStatusFlags(GenPart_statusFlags_.at(part_idx)); + output_p.isFirstCopy = output_pStatusFlags.isFirstCopy(); + output_p.isLastCopy = output_pStatusFlags.isLastCopy(); + double output_pMass=GenParticle::GetMass(output_p.pdgCode(), GenPart_mass_.at(part_idx)); + output_p.p4 = LorentzVectorM(GenPart_pt_.at(part_idx),GenPart_eta_.at(part_idx),GenPart_phi_.at(part_idx),output_pMass); + if(GenPart_vx_.size() == GenPart_pt_.size()) + output_p.vertex = Point3D(GenPart_vx_.at(part_idx), GenPart_vy_.at(part_idx), GenPart_vz_.at(part_idx)); + else // no vertex information in nanoAOD + output_p.vertex = Point3D(0., 0., 0.); + + size_t p_index = lepton_.particles_->size(); + if(mother_index != NoneIndex) + relations_[mother_index].insert(p_index); + + lepton_.particles_->push_back(output_p); + GenParticle& output_ref = lepton_.particles_->back(); + + if(fill_recursively) { + processedParticles_.insert(part_idx); + + for(size_t daughter_idx = part_idx+1; daughter_idx(daughter_pdg))) { + output_ref.hasMissingDaughters = true; + } else { + FillDaughters(daughter_idx, p_index, true); + } + } + } + } + } + }; + + void initialize() + { + if(particles_->empty()) + ThrowError("unable to initalize from an empty particle tree."); + + lastCopy_ = findTerminalCopy(*firstCopy_, false); + std::set processed; + fillParticleCollections(*firstCopy_, false, processed); + + kind_ = determineKind(); + } + + void fillParticleCollections(const GenParticle& particle, bool fromLastCopy, + std::set& processed) + { + if(processed.count(&particle)) return; + processed.insert(&particle); + fromLastCopy = fromLastCopy || &particle == lastCopy_; + const bool isFinalState = particle.daughters.empty(); + if(isFinalState && !particle.isLastCopy) { + std::cerr << "Inconsistent particle: " << particle << std::endl; + ThrowError("last copy flag is not set for a final state particle."); + } + if(particle.isLastCopy) { + const bool isChargedHadron = GenParticle::ChargedHadrons().count(particle.pdgCode()); + const bool isNeutralHadron = GenParticle::NeutralHadrons().count(particle.pdgCode()); + const bool isOther = !(isFinalState || isChargedHadron || isNeutralHadron); + if(isFinalState) { + auto& finalStateSet = fromLastCopy ? finalStateFromDecay_ : finalStateFromRadiation_; + finalStateSet.insert(&particle); + + if(fromLastCopy) { + if(GenParticle::NeutralLeptons().count(particle.pdgCode())) { + ++nFinalStateNeutrinos_; + } else { + if(particle.pdgCode() == GenParticle::PdgId::electron) + ++nFinalStateElectrons_; + if(particle.pdgCode() == GenParticle::PdgId::muon) + ++nFinalStateMuons_; + visibleP4_ += particle.p4; + } + } else { + radiatedP4_ += particle.p4; + } + } + if(fromLastCopy && (isChargedHadron || isNeutralHadron)) { + hadrons_.insert(&particle); + bool isIntermediate = false; + for(auto d : particle.daughters) { + if(!GenParticle::ChargedLeptons().count(d->pdgCode()) + && !GenParticle::NeutralLeptons().count(d->pdgCode()) + && d->pdgCode() != GenParticle::PdgId::photon) { + isIntermediate = true; + break; + } + } + if(isIntermediate) { + intermediateHadrons_.insert(&particle); + } else { + size_t& nHad = isChargedHadron ? nChargedHadrons_ : nNeutralHadrons_; + ++nHad; + } + } + if(isOther) + other_.insert(&particle); + } + + for(const GenParticle* daughter : particle.daughters) + fillParticleCollections(*daughter, fromLastCopy, processed); + } + + Kind determineKind() const + { + const auto pdg = lastCopy_->pdgCode(); + if(pdg == GenParticle::PdgId::electron) + return Kind::PromptElectron; + if(pdg == GenParticle::PdgId::muon) + return Kind::PromptMuon; + if(pdg != GenParticle::PdgId::tau) + std::cerr << "pdg code = "<(pdg) << std::endl; + if(nChargedHadrons_ == 0 && nNeutralHadrons_ != 0) + ThrowError("invalid hadron counts"); + if(nChargedHadrons_ != 0) + return Kind::TauDecayedToHadrons; + if(nFinalStateElectrons_ == 1 && nFinalStateNeutrinos_ == 2) + return Kind::TauDecayedToElectron; + if(nFinalStateMuons_ == 1 && nFinalStateNeutrinos_ == 2) + return Kind::TauDecayedToMuon; + ThrowError("unable to determine gen lepton kind."); + } + + [[noreturn]] void ThrowError(const std::string& message) const + { + if(particles_->size()) + PrintDecay(std::cerr); + ThrowErrorStatic(message); + } + + [[noreturn]] static void ThrowErrorStatic(const std::string& message) + { + throw std::runtime_error("GenLepton: " + message); + } + +private: + std::shared_ptr> particles_; + const GenParticle *firstCopy_{nullptr}, *lastCopy_{nullptr}; + Kind kind_{Kind::Other}; + std::set finalStateFromDecay_, finalStateFromRadiation_, hadrons_, intermediateHadrons_, other_; + LorentzVectorXYZ visibleP4_, radiatedP4_; + size_t nChargedHadrons_{0}, nNeutralHadrons_{0}, nFinalStateElectrons_{0}, nFinalStateMuons_{0}, + nFinalStateNeutrinos_{0}; +}; + +} // namespace gen_truth +} // namespace reco_tau + +namespace v_ops{ +inline ROOT::VecOps::RVec visibleP4( + const ROOT::VecOps::RVec& leptons) +{ + return ROOT::VecOps::Map(leptons, [](const auto& lep) -> reco_tau::gen_truth::LorentzVectorM { + return reco_tau::gen_truth::LorentzVectorM(lep.visibleP4()); + }); +} + +} // namespace v_ops \ No newline at end of file diff --git a/Preprocessing/HLT/GenStatusFlags.h b/Preprocessing/HLT/GenStatusFlags.h new file mode 100644 index 00000000000..3cc459f1f29 --- /dev/null +++ b/Preprocessing/HLT/GenStatusFlags.h @@ -0,0 +1,118 @@ +/** \class reco::GenStatusFlags + The original code can be found at https://github.com/cms-sw/cmssw/blob/master/DataFormats/HepMCCandidate/interface/GenStatusFlags.h + * enum for generator status flags */ + +#pragma once + +#include + +class GenStatusFlags{ +public: + GenStatusFlags(){} + GenStatusFlags(uint16_t flags_) : + flags(flags_) {} + +public: + enum StatusBits { + kIsPrompt = 0, + kIsDecayedLeptonHadron = 1, + kIsTauDecayProduct = 2, + kIsPromptTauDecayProduct = 3, + kIsDirectTauDecayProduct = 4, + kIsDirectPromptTauDecayProduct = 5, + kIsDirectHadronDecayProduct = 6, + kIsHardProcess = 7, + kFromHardProcess = 8, + kIsHardProcessTauDecayProduct = 9, + kIsDirectHardProcessTauDecayProduct = 10, + kFromHardProcessBeforeFSR = 11, + kIsFirstCopy = 12, + kIsLastCopy = 13, + kIsLastCopyBeforeFSR = 14 + }; + + ///////////////////////////////////////////////////////////////////////////// + //these are robust, generator-independent functions for categorizing + //mainly final state particles, but also intermediate hadrons/taus + + //is particle prompt (not from hadron, muon, or tau decay) + bool isPrompt() const { return flags[kIsPrompt]; } + void setIsPrompt(bool b) { flags[kIsPrompt] = b; } + + //is particle a decayed hadron, muon, or tau (does not include resonance decays like W,Z,Higgs,top,etc) + //This flag is equivalent to status 2 in the current HepMC standard + //but older generators (pythia6, herwig6) predate this and use status 2 also for other intermediate + //particles/states + bool isDecayedLeptonHadron() const { return flags[kIsDecayedLeptonHadron]; } + void setIsDecayedLeptonHadron(bool b) { flags[kIsDecayedLeptonHadron] = b; } + + //this particle is a direct or indirect tau decay product + bool isTauDecayProduct() const { return flags[kIsTauDecayProduct]; } + void setIsTauDecayProduct(bool b) { flags[kIsTauDecayProduct] = b; } + + //this particle is a direct or indirect decay product of a prompt tau + bool isPromptTauDecayProduct() const { return flags[kIsPromptTauDecayProduct]; } + void setIsPromptTauDecayProduct(bool b) { flags[kIsPromptTauDecayProduct] = b; } + + //this particle is a direct tau decay product + bool isDirectTauDecayProduct() const { return flags[kIsDirectTauDecayProduct]; } + void setIsDirectTauDecayProduct(bool b) { flags[kIsDirectTauDecayProduct] = b; } + + //this particle is a direct decay product from a prompt tau + bool isDirectPromptTauDecayProduct() const { return flags[kIsDirectPromptTauDecayProduct]; } + void setIsDirectPromptTauDecayProduct(bool b) { flags[kIsDirectPromptTauDecayProduct] = b; } + + //this particle is a direct decay product from a hadron + bool isDirectHadronDecayProduct() const { return flags[kIsDirectHadronDecayProduct]; } + void setIsDirectHadronDecayProduct(bool b) { flags[kIsDirectHadronDecayProduct] = b; } + + ///////////////////////////////////////////////////////////////////////////// + //these are generator history-dependent functions for tagging particles + //associated with the hard process + //Currently implemented for Pythia 6 and Pythia 8 status codes and history + //and may not have 100% consistent meaning across all types of processes + //Users are strongly encouraged to stick to the more robust flags above + + //this particle is part of the hard process + bool isHardProcess() const { return flags[kIsHardProcess]; } + void setIsHardProcess(bool b) { flags[kIsHardProcess] = b; } + + //this particle is the direct descendant of a hard process particle of the same pdg id + bool fromHardProcess() const { return flags[kFromHardProcess]; } + void setFromHardProcess(bool b) { flags[kFromHardProcess] = b; } + + //this particle is a direct or indirect decay product of a tau + //from the hard process + bool isHardProcessTauDecayProduct() const { return flags[kIsHardProcessTauDecayProduct]; } + void setIsHardProcessTauDecayProduct(bool b) { flags[kIsHardProcessTauDecayProduct] = b; } + + //this particle is a direct decay product of a tau + //from the hard process + bool isDirectHardProcessTauDecayProduct() const { return flags[kIsDirectHardProcessTauDecayProduct]; } + void setIsDirectHardProcessTauDecayProduct(bool b) { flags[kIsDirectHardProcessTauDecayProduct] = b; } + + //this particle is the direct descendant of a hard process particle of the same pdg id + //For outgoing particles the kinematics are those before QCD or QED FSR + //This corresponds roughly to status code 3 in pythia 6 + bool fromHardProcessBeforeFSR() const { return flags[kFromHardProcessBeforeFSR]; } + void setFromHardProcessBeforeFSR(bool b) { flags[kFromHardProcessBeforeFSR] = b; } + + //this particle is the first copy of the particle in the chain with the same pdg id + bool isFirstCopy() const { return flags[kIsFirstCopy]; } + void setIsFirstCopy(bool b) { flags[kIsFirstCopy] = b; } + + //this particle is the last copy of the particle in the chain with the same pdg id + //(and therefore is more likely, but not guaranteed, to carry the final physical momentum) + bool isLastCopy() const { return flags[kIsLastCopy]; } + void setIsLastCopy(bool b) { flags[kIsLastCopy] = b; } + + //this particle is the last copy of the particle in the chain with the same pdg id + //before QED or QCD FSR + //(and therefore is more likely, but not guaranteed, to carry the momentum after ISR) + bool isLastCopyBeforeFSR() const { return flags[kIsLastCopyBeforeFSR]; } + void setIsLastCopyBeforeFSR(bool b) { flags[kIsLastCopyBeforeFSR] = b; } + + const std::bitset<15>& getFlags() const {return flags;} + private: + std::bitset<15> flags; +}; \ No newline at end of file diff --git a/Preprocessing/HLT/GenTools.h b/Preprocessing/HLT/GenTools.h new file mode 100644 index 00000000000..60dd1cb51cc --- /dev/null +++ b/Preprocessing/HLT/GenTools.h @@ -0,0 +1,132 @@ +#pragma once + +#include "AnalysisTools.h" +#include "GenLepton.h" + +using Displ3D = reco_tau::gen_truth::Displacement3D; +using RVecDispl3D = ROOT::VecOps::RVec; +using GenLepton = reco_tau::gen_truth::GenLepton; +using RVecGenLepton = ROOT::VecOps::RVec; + +inline bool IsLepton(TauType tau_type) +{ + static const std::set lepton_types = { + TauType::e, TauType::mu, TauType::tau, TauType::emb_e, TauType::emb_mu, TauType::emb_tau, + TauType::displaced_e, TauType::displaced_mu, TauType::displaced_tau + }; + return lepton_types.count(tau_type); +} + +inline RVecLV GetGenP4(const RVecI& tauType, const RVecI& genLepUniqueIdx, const RVecI& genJetUniqueIdx, + const RVecGenLepton& genLeptons, const RVecLV& genJet_p4) +{ + RVecLV gen_p4(tauType.size()); + for(size_t tau_idx = 0; tau_idx < tauType.size(); ++tau_idx) { + const auto tau_type = static_cast(tauType[tau_idx]); + if(IsLepton(tau_type)) { + const int genLepton_idx = genLepUniqueIdx[tau_idx]; + gen_p4[tau_idx] = genLeptons.at(genLepton_idx).visibleP4(); + } else if(tau_type == TauType::jet) { + const int genJet_idx = genJetUniqueIdx[tau_idx]; + gen_p4[tau_idx] = genJet_p4.at(genJet_idx); + } + } + return gen_p4; +} + +inline RVecI GetGenCharge(const RVecI& tauType, const RVecI& genLepUniqueIdx, const RVecGenLepton& genLeptons) +{ + RVecI gen_charge(tauType.size(), 0); + for(size_t tau_idx = 0; tau_idx < tauType.size(); ++tau_idx) { + const auto tau_type = static_cast(tauType[tau_idx]); + if(IsLepton(tau_type)) { + const int genLepton_idx = genLepUniqueIdx[tau_idx]; + gen_charge[tau_idx] = genLeptons.at(genLepton_idx).charge(); + } + } + return gen_charge; +} + +inline RVecI GetGenPartonFlavour(const RVecI& tauType, const RVecI& genJetUniqueIdx, + const ROOT::VecOps::RVec& GenJet_partonFlavour) +{ + RVecI gen_pf(tauType.size(), -999); + for(size_t tau_idx = 0; tau_idx < tauType.size(); ++tau_idx) { + const auto tau_type = static_cast(tauType[tau_idx]); + if(tau_type == TauType::jet) { + const int genJet_idx = genJetUniqueIdx[tau_idx]; + gen_pf[tau_idx] = GenJet_partonFlavour.at(genJet_idx); + } + } + return gen_pf; +} + +inline RVecDispl3D GetGenFlightLength(const RVecI& tauType, const RVecI& genLepUniqueIdx, + const RVecGenLepton& genLeptons) +{ + RVecDispl3D fl(tauType.size(), Displ3D(0, 0, 0)); + for(size_t tau_idx = 0; tau_idx < tauType.size(); ++tau_idx) { + const auto tau_type = static_cast(tauType[tau_idx]); + if(IsLepton(tau_type)) { + const int genLepton_idx = genLepUniqueIdx[tau_idx]; + fl[tau_idx] = genLeptons.at(genLepton_idx).flightLength(); + } + } + return fl; +} + +inline TauType RefineTauTypeDefinition(TauType tau_type, bool isEmbedded, bool isDisplaced) +{ + if(tau_type == TauType::e) { + if(isEmbedded) return TauType::emb_e; + if(isDisplaced) return TauType::displaced_e; + } + if(tau_type == TauType::mu) { + if(isEmbedded) return TauType::emb_mu; + if(isDisplaced) return TauType::displaced_mu; + } + if(tau_type == TauType::tau) { + if(isEmbedded) return TauType::emb_tau; + if(isDisplaced) return TauType::displaced_tau; + } + return tau_type; +} + +inline RVecI GetTauTypes(const RVecGenLepton& genLeptons, const RVecI& genLepUniqueIdx, const RVecSetInt& genLepIndices, + const RVecI& genJetUniqueIdx, bool isEmbedded, bool detectDisplaced, + double minFlightLenght_rho = 1., double maxFlightLenght_rho = 100., + double minFlightLenght_z = 0., double maxFlightLenght_z = 20.) +{ + using namespace reco_tau::gen_truth; + RVecI tau_types(genLepUniqueIdx.size(), static_cast(TauType::other)); + for(size_t obj_idx = 0; obj_idx < tau_types.size(); ++obj_idx) { + if(genLepIndices[obj_idx].empty()) { + if(genJetUniqueIdx[obj_idx] >= 0) + tau_types[obj_idx] = static_cast(isEmbedded ? TauType::emb_jet : TauType::jet); + } else { + if(genLepUniqueIdx[obj_idx] >= 0 && genLepIndices[obj_idx].size() == 1) { + const int genLepton_idx = genLepUniqueIdx[obj_idx]; + const auto& genLepton = genLeptons[genLepton_idx]; + bool isDisplaced = false; + if(detectDisplaced) { + const double flightLength_rho = genLepton.flightLength().rho(); + const double flightLength_z = std::abs(genLepton.flightLength().z()); + isDisplaced = flightLength_rho >= minFlightLenght_rho && flightLength_rho <= maxFlightLenght_rho + && flightLength_z >= minFlightLenght_z && flightLength_z <= maxFlightLenght_z; + } + TauType detectedType = TauType::other; + if(genLepton.kind() == GenLepton::Kind::TauDecayedToHadrons && genLepton.visibleP4().pt() > 15.) + detectedType = TauType::tau; + else if((genLepton.kind() == GenLepton::Kind::PromptElectron + || genLepton.kind() == GenLepton::Kind::TauDecayedToElectron) && genLepton.visibleP4().pt() > 8.) + detectedType = TauType::e; + else if((genLepton.kind() == GenLepton::Kind::PromptMuon + || genLepton.kind() == GenLepton::Kind::TauDecayedToMuon) && genLepton.visibleP4().pt() > 8.) + detectedType = TauType::mu; + detectedType = RefineTauTypeDefinition(detectedType, isEmbedded, isDisplaced); + tau_types[obj_idx] = static_cast(detectedType); + } + } + } + return tau_types; +} diff --git a/Preprocessing/HLT/L1eff.py b/Preprocessing/HLT/L1eff.py new file mode 100644 index 00000000000..9249d94e8f0 --- /dev/null +++ b/Preprocessing/HLT/L1eff.py @@ -0,0 +1,146 @@ +import os +import ROOT +ROOT.gROOT.SetBatch(True) + +def ListToVector(list, type="string"): + vec = ROOT.std.vector(type)() + for item in list: + vec.push_back(item) + return vec + +headers_dir = os.path.dirname(os.path.abspath(__file__)) +for header in [ 'GenLepton.h' ]: + header_path = os.path.join(headers_dir, header) + if not ROOT.gInterpreter.Declare(f'#include "{header_path}"'): + raise RuntimeError(f'Failed to load {header_path}') + + +ROOT.gInterpreter.Declare(''' +using LorentzVectorXYZ = ROOT::Math::LorentzVector>; +using LorentzVectorM = ROOT::Math::LorentzVector>; +using LorentzVectorE = ROOT::Math::LorentzVector>; +using RVecI = ROOT::VecOps::RVec; +using RVecS = ROOT::VecOps::RVec; +using RVecUC = ROOT::VecOps::RVec; +using RVecF = ROOT::VecOps::RVec; +using RVecB = ROOT::VecOps::RVec; + +template +T1 DeltaPhi(T1 phi1, T2 phi2){ + return ROOT::Math::VectorUtil::Phi_mpi_pi(phi2 - phi1); +} +template +T1 DeltaEta(T1 eta1, T2 eta2){ + return (eta2-eta1); +} + +template +T1 DeltaR(T1 eta1, T1 phi1, T2 eta2, T2 phi2){ + T1 dphi = DeltaPhi(phi1, phi2); + T1 deta = DeltaEta(eta1, eta2); + return std::hypot(dphi, deta); +} +''') + +sample_path = "/eos/cms/store/group/phys_tau/kandroso/Run3_HLT/prod_v1/GluGluHToTauTau_M-125" +files = '*.root' +#files = 'nano_2.root' +df = ROOT.RDataFrame("Events", os.path.join(sample_path, files)) + +df = df.Define("genLeptons", """ + reco_tau::gen_truth::GenLepton::fromNanoAOD(GenPart_pt, GenPart_eta, GenPart_phi, GenPart_mass, + GenPart_genPartIdxMother, GenPart_pdgId, GenPart_statusFlags, event) +""") +df = df.Define('genTauIdx', ''' + RVecS indices; + for(size_t n = 0; n < genLeptons.size(); ++n) { + if(genLeptons[n].kind() == reco_tau::gen_truth::GenLepton::Kind::TauDecayedToHadrons + && std::abs(genLeptons[n].visibleP4().eta()) < 2.1) + indices.push_back(n); + } + return indices; +''') + +df = df.Define(f'genTau_l1Match', ''' + RVecI result(genLeptons.size(), -1); + for (size_t idx : genTauIdx) { + int matched = -1; + for(size_t n = 0; n < L1Tau_pt.size(); ++n) { + const auto dR = DeltaR(genLeptons[idx].visibleP4().eta(), genLeptons[idx].visibleP4().phi(), + L1Tau_eta[n], L1Tau_phi[n]); + if(dR < 0.4) { + matched = n; + break; + } + } + result[idx] = matched; + } + return result; +''') + +for var in [ 'pt', 'eta', 'phi', 'mass' ]: + df = df.Define(f'genTau_{var}', f''' + RVecF result; + for(size_t idx : genTauIdx) + result.push_back(genLeptons[idx].visibleP4().{var}()); + return result; + ''') + df = df.Define(f'genTau_{var}_matched', f''' + RVecF result; + for(size_t idx : genTauIdx) {{ + if (genTau_l1Match[idx] >= 0) + result.push_back(genLeptons[idx].visibleP4().{var}()); + }} + return result; + ''') + df = df.Define(f'genTau_{var}_matchedIso', f''' + RVecF result; + for(size_t idx : genTauIdx) {{ + if (genTau_l1Match[idx] >= 0 && L1Tau_hwIso[genTau_l1Match[idx]] > 0) + result.push_back(genLeptons[idx].visibleP4().{var}()); + }} + return result; + ''') + + +df = df.Define('nGenLeptons', 'genLeptons.size()') + +pt_bins = [ + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 300, 500, +] +pt_bins = ListToVector(pt_bins, "double") +pt_model = ROOT.RDF.TH1DModel('', '', pt_bins.size() - 1, pt_bins.data()) + +hist = df.Histo1D(pt_model, 'genTau_pt') +hist_matched = df.Histo1D(pt_model, 'genTau_pt_matched') +hist_matched_iso = df.Histo1D(pt_model, 'genTau_pt_matchedIso') + +eff = ROOT.TEfficiency(hist_matched.GetValue(), hist.GetValue()) +eff.SetStatisticOption(ROOT.TEfficiency.kFCP) + +eff_iso = ROOT.TEfficiency(hist_matched_iso.GetValue(), hist.GetValue()) +eff_iso.SetStatisticOption(ROOT.TEfficiency.kFCP) + +ROOT.gStyle.SetOptStat(0) +canvas = ROOT.TCanvas("canvas", "canvas", 800, 800) +canvas.Draw() +canvas.SetLogx() +hist_base = pt_model.GetHistogram() +hist_base.Draw() +hist_base.GetXaxis().SetTitle("visible gen #tau p_{T} (GeV)") +hist_base.GetXaxis().SetNoExponent(1) +hist_base.GetXaxis().SetMoreLogLabels(1) +hist_base.GetXaxis().SetTitleOffset(1.2) +hist_base.GetYaxis().SetTitle("efficiency") +hist_base.SetTitle("L1 reconstruction efficiency for #tau_{h}") +eff.Draw("SAME P") +eff_iso.SetLineColor(ROOT.kRed) +eff_iso.Draw("SAME P") +canvas.Update() +legend = ROOT.TLegend(0.6, 0.2, 0.9, 0.4) +legend.AddEntry(eff, "L1 #tau", "F") +legend.AddEntry(eff_iso, "iso L1 #tau", "F") +legend.Draw() + +canvas.SaveAs("genTau_pt_eff.pdf") + diff --git a/Preprocessing/HLT/MixBin.py b/Preprocessing/HLT/MixBin.py new file mode 100644 index 00000000000..2d1e31753e2 --- /dev/null +++ b/Preprocessing/HLT/MixBin.py @@ -0,0 +1,49 @@ +class MixBin: + @staticmethod + def Load(cfg): + mix_bins = [] + pt_bin_edges = cfg['bin_edges']['pt'] + eta_bin_edges = cfg['bin_edges']['eta'] + batch_size = 0 + for step_idx, output_step in enumerate(cfg['output']): + step_bins = [] + for bin_idx, bin in enumerate(output_step['bins']): + if len(bin['counts']) != len(pt_bin_edges) - 1: + raise ValueError("Number of counts does not match number of pt bins") + for pt_bin_idx, count in enumerate(bin['counts']): + if count == 0: continue + mix_bin = MixBin() + mix_bin.input_setups = output_step['input_setups'] + mix_bin.inputs = [] + for input_setup in output_step['input_setups']: + mix_bin.inputs.extend(cfg['inputs'][input_setup]) + selection = cfg['bin_selection'].format(pt_low=pt_bin_edges[pt_bin_idx], + pt_high=pt_bin_edges[pt_bin_idx + 1], + eta_low=eta_bin_edges[bin['eta_bin']], + eta_high=eta_bin_edges[bin['eta_bin'] + 1]) + mix_bin.selection = f'L1Tau_Gen_type == static_cast(TauType::{bin["tau_type"]}) && ({selection})' + mix_bin.tau_type = bin['tau_type'] + mix_bin.eta_bin = bin['eta_bin'] + mix_bin.pt_bin = pt_bin_idx + mix_bin.step_idx = step_idx + mix_bin.bin_idx = bin_idx + mix_bin.start_idx = batch_size + mix_bin.stop_idx = batch_size + count + mix_bin.count = count + mix_bin.allow_duplicates = bin.get('allow_duplicates', False) + step_bins.append(mix_bin) + batch_size += count + mix_bins.append(step_bins) + return mix_bins, batch_size + + def Print(self): + print(f'taus/batch: {self.count}') + print(f'inputs: {self.input_setups}') + print(f'eta bin: {self.eta_bin}') + print(f'pt bin: {self.pt_bin}') + print(f'step idx: {self.bin_idx}') + print(f'bin idx: {self.bin_idx}') + print(f'tau_type: {self.tau_type}') + print(f'selection: {self.selection}') + print(f'allow duplicates: {self.allow_duplicates}') + diff --git a/Preprocessing/HLT/TupleMaker.h b/Preprocessing/HLT/TupleMaker.h new file mode 100644 index 00000000000..9e12a86c9d4 --- /dev/null +++ b/Preprocessing/HLT/TupleMaker.h @@ -0,0 +1,206 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "EntryQueue.h" + +namespace analysis { + +struct Entry { + bool valid{false}; + using Variant = std::variant; + std::vector values; + + explicit Entry(size_t size) : values(size) {} + + template + void Set(int index, const T& value) + { + if (index >= values.size()) + throw std::runtime_error("Entry::Add: index out of range"); + values[index] = value; + } +}; + +struct StopLoop {}; + +struct BinDesc { + size_t slot_id; + int start_idx, stop_idx; + + bool IsInside(ULong64_t rdfentry, int batch_size) const + { + const int index = rdfentry % batch_size; + return index >= start_idx && index < stop_idx; + } +}; + +namespace detail { +inline void putEntry(std::vector>& entries, int index) {} + +template +void putEntry(std::vector>& entries, int var_index, const ROOT::VecOps::RVec& value, + Args&& ...args) +{ + if(value.size() != entries.size()) + throw std::runtime_error("TupleMaker::putEntry: vector size mismatch"); + for(size_t entry_index = 0; entry_index < entries.size(); ++entry_index) + entries[entry_index]->Set(var_index, value[entry_index]); + putEntry(entries, var_index + 1, std::forward(args)...); +} + +inline std::string timeStamp(const std::chrono::system_clock::time_point& time_point) +{ + std::ostringstream ss; + const std::time_t time_t_point = std::chrono::system_clock::to_time_t(time_point); + //ss << "[" << std::put_time(std::localtime(&time_t_point), "%F %T") << "] "; + char mbstr[100]; + if (std::strftime(mbstr,sizeof(mbstr),"%F %T",std::localtime(&time_t_point))) + ss << "[" << mbstr << "] "; + return ss.str(); +} + +inline std::string timeStampNow() +{ + return timeStamp(std::chrono::system_clock::now()); +} + +inline int getDuration(const std::chrono::system_clock::time_point& start, + const std::chrono::system_clock::time_point& end) +{ + const auto delta_t = end - start; + const auto duration = std::chrono::duration_cast(delta_t).count(); + return static_cast(duration); +} + +} // namespace detail + +template +struct TupleMaker { + static constexpr size_t nArgs = sizeof...(Args); + using Queue = MultiSlotEntryQueue>; + using PushResult = Queue::PushResult; + + TupleMaker() {} + TupleMaker(const TupleMaker&) = delete; + TupleMaker& operator= (const TupleMaker&) = delete; + + int AddBin(int start_idx, int stop_idx, size_t max_queue_size, size_t max_entries) + { + const size_t slot_id = queue.AddQueue(max_queue_size, max_entries); + bins.emplace_back(BinDesc{ slot_id, start_idx, stop_idx}); + return slot_id; + } + + bool GetSlotId(ULong64_t rdfentry, int batch_size, size_t& slot_id) const + { + for(const auto& bin : bins) { + if(bin.IsInside(rdfentry, batch_size)) { + slot_id = bin.slot_id; + return true; + } + } + return false; + } + + ROOT::RDF::RNode process(ROOT::RDF::RNode df_in, ROOT::RDF::RNode df_out, const std::vector& var_names, + int batch_size, bool infinite_input) + { + thread = std::make_unique([=]() { + std::cout << "TupleMaker::process: foreach started." << std::endl; + ROOT::RDF::RNode df = df_in; + bool do_next_iteration = true; + for(size_t iteration_number = 1; do_next_iteration; ++iteration_number) { + try { + std::cout << detail::timeStampNow() << "TupleMaker::process: foreach iteration " << iteration_number + << std::endl; + df.Foreach([&](const RVecI& slot_sel, const Args& ...args) { + std::vector> entries(slot_sel.size()); + for(size_t entry_index = 0; entry_index < entries.size(); ++entry_index) + entries[entry_index] = std::make_shared(nArgs); + detail::putEntry(entries, 0, args...); + for(size_t entry_index = 0; entry_index < slot_sel.size(); ++entry_index) { + if(slot_sel[entry_index] < 0) continue; + const size_t slot_id = slot_sel[entry_index]; + auto& entry = entries[entry_index]; + entry->valid = true; + // std::cout << "Push to slot " << slot_id << " started." << std::endl; + const auto push_result = queue.Push(slot_id, entry); + if(push_result == PushResult::EntryPushedToEmptyQueue) { + const auto queue_state = queue.GetQueueState(slot_id); + if(queue_state.n_current_max_size_reached > 0) { + const size_t new_max_size = queue_state.max_size * 10; + std::cout << detail::timeStampNow() << "slot_id " << slot_id << " updating max_size from " + << queue_state.max_size << " to " << new_max_size + << ". n_current_max_size_reached=" << queue_state.n_current_max_size_reached + << ", n_max_size_reached_total=" << queue_state.n_max_size_reached_total + << ", n_zero_size_reached=" << queue_state.n_zero_size_reached << std::endl; + queue.SetMaxSize(slot_id, new_max_size); + } + } + // std::cout << "Push to slot " << slot_id << " done." << std::endl; + if(queue.IsMaxEntriesReachedForAll()) + throw StopLoop(); + } + }, var_names); + } catch(StopLoop) { + std::cout << detail::timeStampNow() << "TupleMaker::process: queue output is depleted." << std::endl; + do_next_iteration = false; + } + if(!infinite_input) + do_next_iteration = false; + std::set empty_bins; + for(size_t bin_idx = 0; bin_idx < bins.size(); ++bin_idx) { + const auto& desc = bins[bin_idx]; + const auto queue_state = queue.GetQueueState(desc.slot_id); + if(queue_state.n_entries == 0) + empty_bins.insert(bin_idx); + std::cout << "bin " << bin_idx << ": " << queue_state << std::endl; + } + if(!empty_bins.empty()) { + std::cout << "TupleMaker::process: empty bins: "; + for(const auto& bin_idx : empty_bins) + std::cout << bin_idx << " "; + std::cout << std::endl; + throw std::runtime_error("TupleMaker::process: empty bins after processing the full dataset"); + } + } + queue.SetInputDepleted(); + std::cout << "TupleMaker::process: foreach done." << std::endl; + }); + df_out = df_out.Define("_entry", [=](ULong64_t rdfentry) { + std::shared_ptr entry; + size_t slot_id; + if(GetSlotId(rdfentry, batch_size, slot_id)) { + // std::cout << "Pop from slot " << slot_id << " started." << std::endl; + const auto pop_start = std::chrono::system_clock::now(); + queue.Pop(slot_id, entry); + const auto duration = detail::getDuration(pop_start, std::chrono::system_clock::now()); + if(duration > 60) + std::cout << detail::timeStampNow() << "Pop took " << duration + << " seconds to get the next entry for slot " << slot_id << std::endl; + } + const size_t batch_idx = rdfentry / batch_size; + if((rdfentry % batch_size == 0) && (batch_idx % 100 == 0)) + std::cout << detail::timeStampNow() << "Processed " << batch_idx << " batches" << std::endl; + return entry; + }, { "rdfentry_" }); + return df_out; + } + + void join() + { + if(thread) + thread->join(); + } + + Queue queue; + std::vector bins; + std::unique_ptr thread; +}; + +} // namespace analysis \ No newline at end of file diff --git a/Preprocessing/HLT/dumpBranches.py b/Preprocessing/HLT/dumpBranches.py new file mode 100644 index 00000000000..770d59fbec3 --- /dev/null +++ b/Preprocessing/HLT/dumpBranches.py @@ -0,0 +1,17 @@ +import ROOT +ROOT.gROOT.SetBatch(True) + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description='Dump branches.') + parser.add_argument('--tree', required=False, default='Events', type=str, help="name of the TTree") + parser.add_argument('input', nargs=1, type=str, help="input file") + args = parser.parse_args() + + df = ROOT.RDataFrame(args.tree, args.input[0]) + columns = [ str(c) for c in df.GetColumnNames() ] + columns = sorted(columns) + max_len = max([ len(c) for c in columns ]) + for c in columns: + c_type = str(df.GetColumnType(c)) + print(f'{c:<{max_len+4}}{c_type}') diff --git a/Preprocessing/HLT/dumpHist.py b/Preprocessing/HLT/dumpHist.py new file mode 100644 index 00000000000..fb2c1bc40c9 --- /dev/null +++ b/Preprocessing/HLT/dumpHist.py @@ -0,0 +1,71 @@ +import os +import sys +import math +import numpy as np +import yaml +import json + +if __name__ == "__main__": + file_dir = os.path.dirname(os.path.abspath(__file__)) + base_dir = os.path.dirname(file_dir) + if base_dir not in sys.path: + sys.path.append(base_dir) + __package__ = os.path.split(file_dir)[-1] + +from .AnalysisTools import * +from .MixStep import MixStep + +import ROOT +ROOT.gROOT.SetBatch(True) +ROOT.EnableThreadSafety() + + +def GetBinContent(hist, pt, eta): + x_axis = hist.GetXaxis() + x_bin = x_axis.FindFixBin(pt) + y_axis = hist.GetYaxis() + y_bin = y_axis.FindFixBin(eta) + return hist.GetBinContent(x_bin, y_bin) + +def GetBinEdges(axis): + edges = [] + for bin in range(1, axis.GetNbins() + 2): + edges.append(axis.GetBinLowEdge(bin)) + return np.array(edges) + +def dump_hist(hist_name, input_files): + result = None + for file_name in input_files: + file = ROOT.TFile.Open(file_name, "READ") + hist = file.Get(hist_name) + if result: + result.Add(hist) + else: + result = hist + x_bins = GetBinEdges(result.GetXaxis()) + y_bins = GetBinEdges(result.GetYaxis()) + content = np.zeros((len(x_bins) - 1, len(y_bins) - 1)) + for x_bin in range(1, result.GetNbinsX() + 1): + for y_bin in range(1, result.GetNbinsY() + 1): + content[x_bin-1, y_bin-1] = result.GetBinContent(x_bin, y_bin) + + bin_dict = [] + for x_bin in range(len(x_bins) - 1): + for y_bin in range(len(y_bins) - 1): + bin_dict.append( { + 'pt': (x_bins[x_bin], x_bins[x_bin + 1]), + 'eta': (y_bins[y_bin], y_bins[y_bin + 1]), + 'count': content[x_bin, y_bin], + }) + print(f'x_bins = {x_bins.tolist()}') + print(f'y_bins = {y_bins.tolist()}') + #print(json.dumps(bin_dict, indent=2)) + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description='Make mix.') + parser.add_argument('--hist', required=True, type=str, help="name of the histogram") + parser.add_argument('input', nargs='+', type=str, help="input files") + args = parser.parse_args() + + dump_hist(args.hist, args.input) diff --git a/Preprocessing/HLT/makeMix.py b/Preprocessing/HLT/makeMix.py new file mode 100644 index 00000000000..d32004f3b2b --- /dev/null +++ b/Preprocessing/HLT/makeMix.py @@ -0,0 +1,289 @@ +import math +import os +import shutil +import sys +import yaml +import gc + +if __name__ == "__main__": + file_dir = os.path.dirname(os.path.abspath(__file__)) + base_dir = os.path.dirname(file_dir) + if base_dir not in sys.path: + sys.path.append(base_dir) + __package__ = os.path.split(file_dir)[-1] + +from .AnalysisTools import * +from .MixBin import MixBin + +import ROOT +ROOT.gROOT.SetBatch(True) +ROOT.EnableThreadSafety() + +def make_split_ranges(n_elem, n_splits): + split_size = n_elem / n_splits + splits = [] + for i in range(n_splits + 1): + x = math.floor(i * split_size) if i < n_splits else n_elem + if x in splits: + raise RuntimeError(f'Unable to split {n_elem} into {n_splits} ranges') + splits.append(x) + return splits + +def make_intput_list(inputs, input_root): + input_lists = [] + all_files = [] + n_files_max = 0 + for input in inputs: + input_path = os.path.join(input_root, input) + file_list = MakeFileList(input_path) + n_files_max = max(n_files_max, len(file_list)) + input_lists.append(file_list) + all_files.extend(file_list) + steps = [ len(input_lists[i]) / n_files_max for i in range(len(input_lists)) ] + input_indices = [] + for i in range(n_files_max): + for input_idx in range(len(inputs)): + pos = int(i * steps[input_idx]) + entry = (input_idx, pos) + if entry not in input_indices: + input_indices.append(entry) + input_files = [] + for input_idx, pos in input_indices: + input_files.append(input_lists[input_idx][pos]) + for file in all_files: + if file not in input_files: + raise RuntimeError(f'make_intput_list logic error: {file} not in input_files') + return input_files + +def make_mix(cfg_file, output, n_jobs, job_id): + with open(cfg_file, 'r') as f: + cfg = yaml.safe_load(f) + batch_ranges = make_split_ranges(cfg['n_batches'], n_jobs) + n_batches = batch_ranges[job_id + 1] - batch_ranges[job_id] + print(f'Job {job_id+1}/{n_jobs}: creating {n_batches} batches...') + if os.path.exists(output): + shutil.rmtree(output) + os.makedirs(output, exist_ok=True) + mix_steps, batch_size = MixBin.Load(cfg) + print(f'Number of mix steps: {len(mix_steps)}') + print(f'Batch size: {batch_size}') + for step_idx, mix_bins in enumerate(mix_steps): + print(f'{timestamp_str()}{step_idx+1}/{len(mix_steps)}: processing...') + print(f'Number of bins in step: {len(mix_bins)}') + gc.collect() + input_files = make_intput_list(mix_bins[0].inputs, cfg['input_root']) + print(f'Input files ({len(input_files)} total):') + for file in input_files: + print(f' {file}') + input_files_vec = ListToVector(input_files, "string") + df_in = ROOT.RDataFrame(cfg['tree_name'], input_files_vec) + if n_jobs != 1: + event_ranges = make_split_ranges(df_in.Count().GetValue(), n_jobs) + df_in = df_in.Range(event_ranges[job_id], event_ranges[job_id + 1]) + print(f'Total number of available input entries {event_ranges[-1]}') + print(f'Considering events in range [{event_ranges[job_id]}, {event_ranges[job_id+1]})') + if 'event_sel' in cfg: + df_in = df_in.Filter(cfg['event_sel']) + df_in = ApplyCommonDefinitions(df_in) + + l1tau_columns = [ + 'pt', 'eta', 'phi', 'hwEtSum', 'hwEta', 'hwIso', 'hwPhi', 'hwPt', 'isoEt', 'nTT', 'rawEt', 'towerIEta', + 'towerIPhi', + ] + + df_in = df_in.Define(f'L1Tau_nPV', 'RVecI(L1Tau_pt.size(), nPFPrimaryVertex)') + df_in = df_in.Define(f'L1Tau_event', 'RVecI(L1Tau_pt.size(), static_cast(event))') + df_in = df_in.Define(f'L1Tau_luminosityBlock', 'RVecI(L1Tau_pt.size(), static_cast(luminosityBlock))') + df_in = df_in.Define(f'L1Tau_run', 'RVecI(L1Tau_pt.size(), static_cast(run))') + other_columns = { 'nPV', 'event', 'luminosityBlock', 'run', + 'Gen_type', 'Gen_pt', 'Gen_eta', 'Gen_phi', 'Gen_mass', 'Gen_charge', 'Gen_partonFlavour', + 'Gen_flightLength_rho', 'Gen_flightLength_phi', 'Gen_flightLength_z' } + + tau_columns = [ 'Tau_pt', 'Tau_eta', 'Tau_phi', 'Tau_mass', 'Tau_charge', 'Tau_deepTauVSjet' ] + for c in tau_columns: + df_in = df_in.Define(f'L1Tau_{c}', f'GetVar(L1Tau_tauIdx, {c})') + other_columns.add(c) + + jet_columns = [ 'Jet_pt', 'Jet_eta', 'Jet_phi', 'Jet_mass', 'Jet_PNet_probb', 'Jet_PNet_probc', 'Jet_PNet_probg', + 'Jet_PNet_probtauhm', 'Jet_PNet_probtauhp', 'Jet_PNet_probuds', 'Jet_PNet_ptcorr', ] + for c in jet_columns: + df_in = df_in.Define(f'L1Tau_{c}', f'GetVar(L1Tau_jetIdx, {c})') + other_columns.add(c) + + pfcand_columns = [ 'PFCand_EcalEnergy', 'PFCand_HcalEnergy', 'PFCand_charge', 'PFCand_eta', 'PFCand_longLived', + 'PFCand_mass', 'PFCand_pdgId', 'PFCand_phi', 'PFCand_pt', 'PFCand_rawEcalEnergy', + 'PFCand_rawHcalEnergy', 'PFCand_trackChi2', 'PFCand_trackDxy', 'PFCand_trackDxyError', + 'PFCand_trackDz', 'PFCand_trackDzError', 'PFCand_trackEta', 'PFCand_trackEtaError', + 'PFCand_trackHitsValidFraction', 'PFCand_trackIsValid', 'PFCand_trackNdof', + 'PFCand_trackNumberOfLostHits', 'PFCand_trackNumberOfValidHits', 'PFCand_trackPhi', + 'PFCand_trackPhiError', 'PFCand_trackPt', 'PFCand_trackPtError', 'PFCand_vx', 'PFCand_vy', + 'PFCand_vz', ] + df_in = df_in.Define('PFCand_p4', 'GetP4(PFCand_pt, PFCand_eta, PFCand_phi, PFCand_mass)') + df_in = df_in.Define('L1Tau_PFCand_matched', 'FindMatchingSet(L1Tau_p4, PFCand_p4, 0.5)') + for c in pfcand_columns: + df_in = df_in.Define(f'L1Tau_{c}', f'GetVarVec(L1Tau_PFCand_matched, {c})') + other_columns.add(c) + + pfpv_columns = [ 'PFPrimaryVertex_chi2', 'PFPrimaryVertex_ndof', 'PFPrimaryVertex_x', 'PFPrimaryVertex_y', + 'PFPrimaryVertex_z', ] + for c in pfpv_columns: + df_in = df_in.Define(f'L1Tau_{c}', f'ROOT::VecOps::RVec(L1Tau_pt.size(), {c})') + other_columns.add(c) + + pfsv_columns = [ 'PFSecondaryVertex_chi2', 'PFSecondaryVertex_eta', 'PFSecondaryVertex_mass', + 'PFSecondaryVertex_ndof', 'PFSecondaryVertex_ntracks', 'PFSecondaryVertex_phi', + 'PFSecondaryVertex_pt', 'PFSecondaryVertex_x', 'PFSecondaryVertex_y', 'PFSecondaryVertex_z', ] + df_in = df_in.Define('PFSecondaryVertex_p4', '''GetP4(PFSecondaryVertex_pt, PFSecondaryVertex_eta, + PFSecondaryVertex_phi, PFSecondaryVertex_mass)''') + df_in = df_in.Define('L1Tau_PFSecondaryVertex_matched', 'FindMatchingSet(L1Tau_p4, PFSecondaryVertex_p4, 0.5)') + for c in pfsv_columns: + df_in = df_in.Define(f'L1Tau_{c}', f'GetVarVec(L1Tau_PFSecondaryVertex_matched, {c})') + other_columns.add(c) + + pixelTrack_columns = [ 'PixelTrack_charge', 'PixelTrack_chi2', 'PixelTrack_dxy', 'PixelTrack_dz', 'PixelTrack_eta', + 'PixelTrack_nHits', 'PixelTrack_nLayers', 'PixelTrack_phi', 'PixelTrack_pt', + 'PixelTrack_quality', 'PixelTrack_tip', 'PixelTrack_vtxIdx', 'PixelTrack_zip', ] + df_in = df_in.Define('PixelTrack_mass', 'RVecF(PixelTrack_pt.size(), 0.f)') + df_in = df_in.Define('PixelTrack_p4', 'GetP4(PixelTrack_pt, PixelTrack_eta, PixelTrack_phi, PixelTrack_mass)') + df_in = df_in.Define('L1Tau_PixelTrack_matched', 'FindMatchingSet(L1Tau_p4, PixelTrack_p4, 0.5)') + for c in pixelTrack_columns: + df_in = df_in.Define(f'L1Tau_{c}', f'GetVarVec(L1Tau_PixelTrack_matched, {c})') + other_columns.add(c) + + pixelVertex_columns = [ 'PixelVertex_chi2', 'PixelVertex_ndof', 'PixelVertex_ptv2', 'PixelVertex_weight', + 'PixelVertex_z', ] + for c in pixelVertex_columns: + df_in = df_in.Define(f'L1Tau_{c}', f'ROOT::VecOps::RVec(L1Tau_pt.size(), {c})') + other_columns.add(c) + + recHit_colums = { + 'EB': [ 'RecHitEB_chi2', 'RecHitEB_energy', 'RecHitEB_eta', 'RecHitEB_isTimeErrorValid', + 'RecHitEB_isTimeValid', 'RecHitEB_phi', 'RecHitEB_rho', 'RecHitEB_time', + 'RecHitEB_timeError', ], + 'EE': [ 'RecHitEE_chi2', 'RecHitEE_energy', 'RecHitEE_eta', 'RecHitEE_isTimeErrorValid', + 'RecHitEE_isTimeValid', 'RecHitEE_phi', 'RecHitEE_rho', 'RecHitEE_time', + 'RecHitEE_timeError', ], + 'HBHE': [ 'RecHitHBHE_chi2', 'RecHitHBHE_eaux', 'RecHitHBHE_energy', 'RecHitHBHE_eraw', + 'RecHitHBHE_eta', 'RecHitHBHE_eta_front', 'RecHitHBHE_phi', 'RecHitHBHE_phi_front', + 'RecHitHBHE_rho', 'RecHitHBHE_rho_front', 'RecHitHBHE_time', 'RecHitHBHE_timeFalling', ], + 'HO': [ 'RecHitHO_energy', 'RecHitHO_eta', 'RecHitHO_phi', 'RecHitHO_rho', 'RecHitHO_time', ], + } + for det, columns in recHit_colums.items(): + df_in = df_in.Define(f'RecHit{det}_mass', f'RVecF(RecHit{det}_rho.size(), 0.f)') + df_in = df_in.Define(f'RecHit{det}_p4', f'''GetP4(RecHit{det}_rho, RecHit{det}_eta, + RecHit{det}_phi, RecHit{det}_mass)''') + df_in = df_in.Define(f'L1Tau_RecHit{det}_matched', f'FindMatchingSet(L1Tau_p4, RecHit{det}_p4, 0.5)') + for c in columns: + df_in = df_in.Define(f'L1Tau_{c}', f'GetVarVec(L1Tau_RecHit{det}_matched, {c})') + other_columns.add(c) + + l1tau_columns.extend(other_columns) + + columns_in = [ ] + columns_out = [] + for col in l1tau_columns: + columns_in.append(f'L1Tau_{col}') + out_name = f'L1Tau_{col}' if col not in other_columns else col + columns_out.append(out_name) + + column_types = [ str(df_in.GetColumnType(c)) for c in columns_in ] + + tuple_maker = ROOT.analysis.TupleMaker(*column_types)() + + print(f'inputs: {mix_bins[0].input_setups}') + nTau_in = 0 + slot_sel_str = 'RVecI slot_sel(L1Tau_pt.size(), -1);' + for bin in mix_bins: + nTau_batch = bin.stop_idx - bin.start_idx + nTau_bin = nTau_batch * n_batches + max_queue_size = nTau_bin if bin.allow_duplicates else min(10000, nTau_bin) + slot_id = tuple_maker.AddBin(bin.start_idx, bin.stop_idx, max_queue_size, nTau_bin) + nTau_in += nTau_bin + slot_sel_str += f''' + {{ + auto sel = {bin.selection}; + for(size_t n = 0; n < L1Tau_pt.size(); ++n) {{ + if(sel[n]) slot_sel[n] = {slot_id}; + }} + }} + ''' + print(f'bin: slot={slot_id} nTaus={nTau_bin} selection="{bin.selection}"') + slot_sel_str += ' return slot_sel;' + print(f'nTaus total = {nTau_in}') + df_in = df_in.Define('slot_sel', slot_sel_str) + df_in = df_in.Filter('for(auto slot : slot_sel) if(slot >= 0) return true; return false;') + + columns_in = [ 'slot_sel' ] + columns_in + column_types = [ str(df_in.GetColumnType('slot_sel')) ] + column_types + + if step_idx == 0: + nTau_out = batch_size * n_batches + df_out = ROOT.RDataFrame(nTau_out) + else: + output_prev_step = os.path.join(output, f'step_{step_idx}.root') + df_out = ROOT.RDataFrame(cfg['tree_name'], output_prev_step) + + columns_in_v = ListToVector(columns_in) + df_out = tuple_maker.process(ROOT.RDF.AsRNode(df_in), ROOT.RDF.AsRNode(df_out), columns_in_v, batch_size, True) + + define_fn_name = 'Define' if step_idx == 0 else 'Redefine' + for column_idx in range(1, len(columns_in)): + column_type = column_types[column_idx] + if column_type in ['ROOT::VecOps::RVec', 'ROOT::VecOps::RVec' ]: + default_value = '0' + entry_type = 'int' + elif column_type in [ 'ROOT::VecOps::RVec', 'ROOT::VecOps::RVec']: + default_value = '0.f' + entry_type = 'float' + elif column_type in [ 'ROOT::VecOps::RVec >', 'ROOT::VecOps::RVec >']: + default_value = 'RVecI()' + entry_type = 'RVecI' + elif column_type == 'ROOT::VecOps::RVec >': + default_value = 'RVecF()' + entry_type = 'RVecF' + else: + raise Exception(f'Unknown column type {column_type}') + + if step_idx != 0: + default_value = f'{columns_out[column_idx-1]}' + define_str = f'_entry && _entry->valid ? std::get<{entry_type}>(_entry->values.at({column_idx - 1})) : {default_value}' + df_out = getattr(df_out, define_fn_name)(columns_out[column_idx - 1], define_str) + + if step_idx == 0: + df_out = df_out.Define('is_valid', '_entry && _entry->valid') + df_out = df_out.Define('step_idx', f'int({step_idx + 1})') + else: + df_out = df_out.Redefine('is_valid', '(_entry && _entry->valid) || is_valid') + df_out = df_out.Redefine('step_idx', f'(_entry && _entry->valid) ? int({step_idx + 1}) : step_idx') + columns_out.extend(['is_valid', 'step_idx']) + + opt = ROOT.RDF.RSnapshotOptions() + if step_idx + 1 == len(mix_steps): + opt.fCompressionAlgorithm = ROOT.ROOT.kLZMA + opt.fCompressionLevel = 9 + else: + opt.fCompressionAlgorithm = ROOT.ROOT.kLZ4 + opt.fCompressionLevel = 4 + opt.fMode = 'RECREATE' + output_step = os.path.join(output, f'step_{step_idx+1}.root') + columns_out_v = ListToVector(columns_out) + df_out.Snapshot(cfg['tree_name'], output_step, columns_out_v, opt) + tuple_maker.join() + print(f'{timestamp_str()}{step_idx+1}/{len(mix_steps)}: done.') + if step_idx > 0: + print(f'Removing previous step...') + prev_step = os.path.join(output, f'step_{step_idx}.root') + os.remove(prev_step) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description='Make mix.') + parser.add_argument('--cfg', required=True, type=str, help="Mix config file") + parser.add_argument('--output', required=True, type=str, help="output directory") + parser.add_argument('--n-jobs', required=False, type=int, default=1, help="number of parallel jobs") + parser.add_argument('--job-id', required=False, type=int, default=0, help="current job id") + args = parser.parse_args() + + PrepareRootEnv() + make_mix(args.cfg, args.output, args.n_jobs, args.job_id) diff --git a/Preprocessing/HLT/makeSpectrumHist.py b/Preprocessing/HLT/makeSpectrumHist.py new file mode 100644 index 00000000000..503023df77b --- /dev/null +++ b/Preprocessing/HLT/makeSpectrumHist.py @@ -0,0 +1,50 @@ +from enum import Enum +import os +import sys + +if __name__ == "__main__": + file_dir = os.path.dirname(os.path.abspath(__file__)) + base_dir = os.path.dirname(file_dir) + if base_dir not in sys.path: + sys.path.append(base_dir) + __package__ = os.path.split(file_dir)[-1] + +from .AnalysisTools import * + +import ROOT + +def make_hists(input_files, input_tree, output, x_bins, y_bins, tau_types): + input_files_vec = ListToVector(input_files, "string") + df = ROOT.RDataFrame("Events", input_files_vec) + df = ApplyCommonDefinitions(df=df) + x_bins_vec = ListToVector(x_bins, "double") + y_bins_vec = ListToVector(y_bins, "double") + model = ROOT.RDF.TH2DModel("spectrum", "spectrum", x_bins_vec.size() - 1, x_bins_vec.data(), + y_bins_vec.size() - 1, y_bins_vec.data()) + hists = {} + for tau_type in tau_types: + df_tau = df.Define("L1Tau_Gen_pt_sel", f"L1Tau_Gen_pt[L1Tau_Gen_type == {tau_type.value}]") \ + .Define("L1Tau_Gen_abs_eta_sel", f"L1Tau_Gen_abs_eta[L1Tau_Gen_type == {tau_type.value}]") + hists[tau_type] = df_tau.Histo2D(model, "L1Tau_Gen_pt_sel", "L1Tau_Gen_abs_eta_sel") + os.makedirs(os.path.dirname(output), exist_ok=True) + output_file = ROOT.TFile(output, "RECREATE") + for tau_type, hist in hists.items(): + output_file.WriteTObject(hist.GetValue(), f'pt_eta_{tau_type.name}') + output_file.Close() + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description='Make histogram.') + parser.add_argument('--input', required=True, type=str, help="Input file(s)") + parser.add_argument('--output', required=True, type=str, help="output directory") + parser.add_argument('--x-bins', required=True, type=str, help="x bins") + parser.add_argument('--y-bins', required=True, type=str, help="y bins") + parser.add_argument('--input-tree', required=False, type=str, default='Events', help="Input tree") + parser.add_argument('--tau-types', required=False, type=str, default='e,mu,tau,jet,displaced_tau', help="tau types") + args = parser.parse_args() + + PrepareRootEnv() + x_bins = [float(x) for x in args.x_bins.split(',')] + y_bins = [float(y) for y in args.y_bins.split(',')] + tau_types = [TauType[x] for x in args.tau_types.split(',')] + make_hists(MakeFileList(args.input), args.input_tree, args.output, x_bins, y_bins, tau_types) diff --git a/Preprocessing/HLT/mixAnalyzer.py b/Preprocessing/HLT/mixAnalyzer.py new file mode 100644 index 00000000000..e7dd312ed5b --- /dev/null +++ b/Preprocessing/HLT/mixAnalyzer.py @@ -0,0 +1,96 @@ +import os +import sys +import math +import numpy as np +import yaml + +if __name__ == "__main__": + file_dir = os.path.dirname(os.path.abspath(__file__)) + base_dir = os.path.dirname(file_dir) + if base_dir not in sys.path: + sys.path.append(base_dir) + __package__ = os.path.split(file_dir)[-1] + +from .AnalysisTools import * +from .MixBin import MixBin + +import ROOT +ROOT.gROOT.SetBatch(True) +ROOT.EnableThreadSafety() + + +def GetBinContent(hist, pt, eta): + x_axis = hist.GetXaxis() + x_bin = x_axis.FindFixBin(pt) + y_axis = hist.GetYaxis() + y_bin = y_axis.FindFixBin(eta) + return hist.GetBinContent(x_bin, y_bin) + +def analyse_mix(cfg_file): + with open(cfg_file, 'r') as f: + cfg = yaml.safe_load(f) + mix_bins_groupped, batch_size = MixBin.Load(cfg) + n_steps = len(mix_bins_groupped) + mix_bins = [] + for bins in mix_bins_groupped: + mix_bins.extend(bins) + print(f'Number of mix steps: {n_steps}') + print(f'Number of mix bins: {len(mix_bins)}') + print(f'Batch size: {batch_size}') + + step_stats = {} + for tau_type in [ 'total', 'tau', 'displaced_tau', 'jet', 'e' ]: + step_stats[tau_type] = np.ones((len(mix_bins), 2)) * -1 + n_taus = { } + n_taus_batch = { } + for step_idx, step in enumerate(mix_bins): + n_available = 0 + for input in step.inputs: + input_path = os.path.join(cfg['spectrum_root'], f'{input}.root') + file = ROOT.TFile.Open(input_path, "READ") + hist_name = f'pt_eta_{step.tau_type}' + hist = file.Get(hist_name) + n_available += hist.GetBinContent(step.pt_bin + 1, step.eta_bin + 1) + n_taus[step.tau_type] = n_available + n_taus.get(step.tau_type, 0) + n_taus_batch[step.tau_type] = step.count + n_taus_batch.get(step.tau_type, 0) + n_batches = math.floor(n_available / step.count) if not step.allow_duplicates else float('inf') + step.n_available = n_available + for tau_type in [ 'total', step.tau_type ]: + step_stats[tau_type][step_idx, 0] = n_batches + step_stats[tau_type][step_idx, 1] = step_idx + finite_steps = step_stats['total'][np.isfinite(step_stats['total'][:, 0]), :] + step_idx = np.argmin(finite_steps[:, 0]) + print(f'Total number of samples = {sum(n_taus.values())}: {n_taus}') + n_taus_active = { name: x * finite_steps[step_idx, 0] for name, x in n_taus_batch.items()} + print(f'Total number of used samples = {sum(n_taus_active.values())}: {n_taus_active}') + n_taus_frac = { name: n_taus_active[name] / x for name, x in n_taus.items()} + print(f'Used fraction: {n_taus_frac}') + print(f'Number of samples per batch: {n_taus_batch}') + n_taus_rel = { name: x / n_taus_batch['tau'] for name, x in n_taus_batch.items()} + print(f'Relative number of samples: {n_taus_rel}') + print('Bin with minimum number of batches:') + print(f'n_batches: {finite_steps[step_idx, 0]}') + mix_bins[int(finite_steps[step_idx, 1])].Print() + for name, stat in step_stats.items(): + if name == 'total': continue + finite_steps = stat[np.isfinite(stat[:, 0]) & (stat[:, 0] >= 0), :] + if np.shape(finite_steps)[0] == 0: continue + step_idx = np.argmax(finite_steps[:, 0]) + print(f'Bin with maximum number of batches for {name}:') + print(f'n_batches: {finite_steps[step_idx, 0]}') + mix_bins[int(finite_steps[step_idx, 1])].Print() + n_batches = cfg['n_batches'] + print(f'Target number of batches: {n_batches}') + print('Bins with allowed duplicates:') + for step_idx in range(len(mix_bins)): + if mix_bins[step_idx].allow_duplicates: + n_repetitions = (n_batches * mix_bins[step_idx].count) / mix_bins[step_idx].n_available + print(f'step {mix_bins[step_idx].step_idx} bin {mix_bins[step_idx].bin_idx} n_repetitions = {n_repetitions:.2f} selection: {mix_bins[step_idx].selection}') + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description='Make mix.') + parser.add_argument('--cfg', required=True, type=str, help="Mix config file") + args = parser.parse_args() + + analyse_mix(args.cfg) diff --git a/Preprocessing/HLT/mix_setup.yaml b/Preprocessing/HLT/mix_setup.yaml new file mode 100644 index 00000000000..1ed7f6dafb4 --- /dev/null +++ b/Preprocessing/HLT/mix_setup.yaml @@ -0,0 +1,112 @@ +input_root: /data2/Run3_HLT/prod_v3 +spectrum_root: /data2/Run3_HLT/prod_v3_spec +n_batches: 50000 +tree_name: Events + +inputs: + Higgs: + - GluGluHToTauTau_M-125_ext1 + - GluGlutoHHto2B2Tau_kl-0p00_kt-1p00_c2-0p00 + - GluGlutoHHto2B2Tau_kl-0p00_kt-1p00_c2-1p00 + - GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p35 + - GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-3p00 + - GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-m2p00 + - GluGlutoHHto2B2Tau_kl-2p45_kt-1p00_c2-0p00 + - VBFHHto2B2Tau_CV-1_C2V-1_C3-1 + - VBFHHto2B2Tau_CV-1_C2V-1_C3-2 + - VBFHHto2B2Tau_CV-1_C2V-2_C3-1 + - VBFHToTauTau_M125_ext1 + Zprime: + - ZprimeToTauTau_M-4000 + - ZprimeToEE_M-6000 + DY: + - DYTo2L_MLL-50 + - DYTo2L_MLL-4to50 + TT: + - TT + - TT_ext1 + QCD: + - QCD_PT-600toInf + - QCD_PT-470to600 + - QCD_PT-300to470 + - QCD_PT-170to300 + - QCD_PT-120to170 + - QCD_PT-80to120 + - QCD_PT-50to80 + - QCD_PT-30to50 + HNL: + - HNL_M-3_V-0.0276_tau + - HNL_M-5_V-0.0046_tau + - HNL_M-7_V-0.0013_tau + Staus_M_100: + - Staus_M_100_100mm + - Staus_M_100_10mm + Staus_M_300: + - Staus_M_300 + +bin_selection: " + L1Tau_Gen_pt >= {pt_low} && L1Tau_Gen_pt < {pt_high} + && L1Tau_Gen_abs_eta >= {eta_low} && L1Tau_Gen_abs_eta < {eta_high} +" + +bin_edges: + pt: [ 20.0, 30.0, 40.0, 60.0, 80.0, 100.0, 150.0, 250.0, 1000.0 ] + eta: [ 0.0, 1.5, 2.1 ] + +output: + - input_setups: [ Higgs, DY, TT ] + bins: + - tau_type: tau + eta_bin: 0 + counts: [ 28, 28, 28, 16, 7, 0, 0, 0 ] + - tau_type: tau + eta_bin: 1 + counts: [ 10, 9, 10, 4, 1, 0, 0, 0 ] + - input_setups: [ Higgs, DY, TT ] + bins: + - tau_type: e + eta_bin: 0 + counts: [ 2, 3, 3, 2, 2, 0, 0, 0 ] + - tau_type: e + eta_bin: 1 + counts: [ 1, 1, 2, 1, 1, 0, 0, 0 ] + - input_setups: [ Zprime, Higgs, DY, TT ] + bins: + - tau_type: tau + eta_bin: 0 + counts: [ 0, 0, 0, 0, 0, 7, 3, 2 ] + allow_duplicates: true + - tau_type: tau + eta_bin: 1 + counts: [ 0, 0, 0, 0, 0, 1, 1, 1 ] + allow_duplicates: true + - tau_type: e + eta_bin: 0 + counts: [ 0, 0, 0, 0, 0, 1, 1, 1 ] + allow_duplicates: true + - tau_type: e + eta_bin: 1 + counts: [ 0, 0, 0, 0, 0, 1, 1, 1 ] + allow_duplicates: true + - input_setups: [ Staus_M_100, Staus_M_300, HNL ] + bins: + - tau_type: displaced_tau + eta_bin: 0 + counts: [ 1, 1, 1, 1, 1, 1, 1, 1 ] + allow_duplicates: true + - input_setups: [ TT ] + bins: + - tau_type: jet + eta_bin: 0 + counts: [ 28, 28, 28, 16, 7, 7, 3, 2 ] + - tau_type: jet + eta_bin: 1 + counts: [ 10, 9, 10, 4, 1, 1, 1, 1 ] + - input_setups: [ QCD ] + bins: + - tau_type: jet + eta_bin: 0 + counts: [ 28, 28, 28, 16, 7, 7, 3, 2 ] + - tau_type: jet + eta_bin: 1 + counts: [ 10, 9, 10, 4, 1, 1, 1, 1 ] diff --git a/TauMLTools.code-workspace b/TauMLTools.code-workspace index 876a1499c09..f2ccceefece 100644 --- a/TauMLTools.code-workspace +++ b/TauMLTools.code-workspace @@ -4,5 +4,14 @@ "path": "." } ], - "settings": {} + "settings": { + "files.associations": { + "condition_variable": "cpp", + "*.ipp": "cpp", + "array": "cpp", + "bitset": "cpp", + "initializer_list": "cpp", + "regex": "cpp" + } + } } \ No newline at end of file