-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add ONNXInference algorithm, use it to provide EcalEndcapNClusterPart…
…icleIDs (#1618) ### Briefly, what does this PR introduce? <img width="582" alt="image" src="https://github.com/user-attachments/assets/a8c0aa47-e64f-4008-9702-444c6992ca3d"> This provides a calorimeter PID capability using ONNXruntime inference engine. The inference algorithm is generalized into a separate algorithm which inputs and outputs `edm4eic::Tensor` values introduced by eic/EDM4eic#96. A set of weights needed to run this are provided in eic/epic-data#22, and is generated using a script from eic/detector_benchmarks#91. ### What kind of change does this PR introduce? - [ ] Bug fix (issue #__) - [x] New feature (issue #__) - [ ] Documentation update - [ ] Other: __ ### Please check if this PR fulfills the following: - [ ] Tests for the changes have been added - [ ] Documentation has been added / updated - [ ] Changes have been communicated to collaborators ### Does this PR introduce breaking changes? What changes might users need to make to their code? No ### Does this PR change default behavior? Yes
- Loading branch information
Showing
13 changed files
with
730 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
// SPDX-License-Identifier: LGPL-3.0-or-later | ||
// Copyright (C) 2024 Dmitry Kalinkin | ||
|
||
#include <edm4eic/EDM4eicVersion.h> | ||
|
||
#if EDM4EIC_VERSION_MAJOR >= 8 | ||
#include <cstddef> | ||
#include <fmt/core.h> | ||
#include <gsl/pointers> | ||
#include <stdexcept> | ||
|
||
#include "CalorimeterParticleIDPostML.h" | ||
|
||
namespace eicrecon { | ||
|
||
void CalorimeterParticleIDPostML::init() { | ||
// Nothing | ||
} | ||
|
||
void CalorimeterParticleIDPostML::process( | ||
const CalorimeterParticleIDPostML::Input& input, | ||
const CalorimeterParticleIDPostML::Output& output) const { | ||
|
||
const auto [in_clusters, in_assocs, prediction_tensors] = input; | ||
auto [out_clusters, out_assocs, out_particle_ids] = output; | ||
|
||
if (prediction_tensors->size() != 1) { | ||
error("Expected to find a single tensor, found {}", prediction_tensors->size()); | ||
throw std::runtime_error(""); | ||
} | ||
edm4eic::Tensor prediction_tensor = (*prediction_tensors)[0]; | ||
|
||
if (prediction_tensor.shape_size() != 2) { | ||
error("Expected tensor rank to be 2, but it is {}", prediction_tensor.shape_size()); | ||
throw std::runtime_error(fmt::format("Expected tensor rank to be 2, but it is {}", prediction_tensor.shape_size())); | ||
} | ||
|
||
if (prediction_tensor.getShape(0) != in_clusters->size()) { | ||
error("Length mismatch between tensor's 0th axis and number of clusters: {} != {}", prediction_tensor.getShape(0), in_clusters->size()); | ||
throw std::runtime_error(fmt::format("Length mismatch between tensor's 0th axis and number of clusters: {} != {}", prediction_tensor.getShape(0), in_clusters->size())); | ||
} | ||
|
||
if (prediction_tensor.getShape(1) != 2) { | ||
error("Expected 2 values per cluster in the output tensor, got {}", prediction_tensor.getShape(0)); | ||
throw std::runtime_error(fmt::format("Expected 2 values per cluster in the output tensor, got {}", prediction_tensor.getShape(0))); | ||
} | ||
|
||
if (prediction_tensor.getElementType() != 1) { // 1 - float | ||
error("Expected a tensor of floats, but element type is {}", prediction_tensor.getElementType()); | ||
throw std::runtime_error(fmt::format("Expected a tensor of floats, but element type is {}", prediction_tensor.getElementType())); | ||
} | ||
|
||
for (size_t cluster_ix = 0; cluster_ix < in_clusters->size(); cluster_ix++) { | ||
edm4eic::Cluster in_cluster = (*in_clusters)[cluster_ix]; | ||
edm4eic::MutableCluster out_cluster = in_cluster.clone(); | ||
out_clusters->push_back(out_cluster); | ||
|
||
float prob_pion = prediction_tensor.getFloatData(cluster_ix * prediction_tensor.getShape(1) + 0); | ||
float prob_electron = prediction_tensor.getFloatData(cluster_ix * prediction_tensor.getShape(1) + 1); | ||
|
||
out_cluster.addToParticleIDs(out_particle_ids->create( | ||
0, // std::int32_t type | ||
211, // std::int32_t PDG | ||
0, // std::int32_t algorithmType | ||
prob_pion // float likelihood | ||
)); | ||
out_cluster.addToParticleIDs(out_particle_ids->create( | ||
0, // std::int32_t type | ||
11, // std::int32_t PDG | ||
0, // std::int32_t algorithmType | ||
prob_electron // float likelihood | ||
)); | ||
|
||
// propagate associations | ||
for (auto in_assoc : *in_assocs) { | ||
if (in_assoc.getRec() == in_cluster) { | ||
auto out_assoc = in_assoc.clone(); | ||
out_assoc.setRec(out_cluster); | ||
out_assocs->push_back(out_assoc); | ||
} | ||
} | ||
} | ||
} | ||
|
||
} // namespace eicrecon | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// SPDX-License-Identifier: LGPL-3.0-or-later | ||
// Copyright (C) 2024 Dmitry Kalinkin | ||
|
||
#pragma once | ||
|
||
#include <algorithms/algorithm.h> | ||
#include <edm4eic/ClusterCollection.h> | ||
#include <edm4eic/MCRecoClusterParticleAssociationCollection.h> | ||
#include <edm4eic/TensorCollection.h> | ||
#include <edm4hep/ParticleIDCollection.h> | ||
#include <optional> | ||
#include <string> | ||
#include <string_view> | ||
|
||
#include "algorithms/interfaces/WithPodConfig.h" | ||
|
||
namespace eicrecon { | ||
|
||
using CalorimeterParticleIDPostMLAlgorithm = | ||
algorithms::Algorithm< | ||
algorithms::Input<edm4eic::ClusterCollection, | ||
std::optional<edm4eic::MCRecoClusterParticleAssociationCollection>, | ||
edm4eic::TensorCollection>, | ||
algorithms::Output<edm4eic::ClusterCollection, | ||
std::optional<edm4eic::MCRecoClusterParticleAssociationCollection>, | ||
edm4hep::ParticleIDCollection> | ||
>; | ||
|
||
class CalorimeterParticleIDPostML : public CalorimeterParticleIDPostMLAlgorithm, | ||
public WithPodConfig<NoConfig> { | ||
|
||
public: | ||
CalorimeterParticleIDPostML(std::string_view name) | ||
: CalorimeterParticleIDPostMLAlgorithm{name, | ||
{"inputClusters", "inputClusterAssociations", "inputPredictionsTensor"}, | ||
{"outputClusters", "outputClusterAssociations", "outputParticleIDs"}, | ||
""} { | ||
} | ||
|
||
void init() final; | ||
void process(const Input&, const Output&) const final; | ||
}; | ||
|
||
} // namespace eicrecon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
// SPDX-License-Identifier: LGPL-3.0-or-later | ||
// Copyright (C) 2024 Dmitry Kalinkin | ||
|
||
#include <edm4eic/EDM4eicVersion.h> | ||
|
||
#if EDM4EIC_VERSION_MAJOR >= 8 | ||
#include <cstddef> | ||
#include <edm4hep/MCParticle.h> | ||
#include <edm4hep/Vector3f.h> | ||
#include <edm4hep/utils/vector_utils.h> | ||
#include <cstdint> | ||
#include <stdexcept> | ||
#include <fmt/core.h> | ||
#include <gsl/pointers> | ||
|
||
#include "CalorimeterParticleIDPreML.h" | ||
|
||
namespace eicrecon { | ||
|
||
void CalorimeterParticleIDPreML::init() { | ||
// Nothing | ||
} | ||
|
||
void CalorimeterParticleIDPreML::process( | ||
const CalorimeterParticleIDPreML::Input& input, | ||
const CalorimeterParticleIDPreML::Output& output) const { | ||
|
||
const auto [clusters, cluster_assocs] = input; | ||
auto [feature_tensors, target_tensors] = output; | ||
|
||
edm4eic::MutableTensor feature_tensor = feature_tensors->create(); | ||
feature_tensor.addToShape(clusters->size()); | ||
feature_tensor.addToShape(11); // p, E/p, azimuthal, polar, 7 shape parameters | ||
feature_tensor.setElementType(1); // 1 - float | ||
|
||
edm4eic::MutableTensor target_tensor; | ||
if (cluster_assocs) { | ||
target_tensor = target_tensors->create(); | ||
target_tensor.addToShape(clusters->size()); | ||
target_tensor.addToShape(2); // is electron, is hadron | ||
target_tensor.setElementType(7); // 7 - int64 | ||
} | ||
|
||
for (edm4eic::Cluster cluster : *clusters) { | ||
double momentum; | ||
{ | ||
// FIXME: use track momentum once matching to tracks becomes available | ||
edm4eic::MCRecoClusterParticleAssociation best_assoc; | ||
for (auto assoc : *cluster_assocs) { | ||
if (assoc.getRec() == cluster) { | ||
if ((not best_assoc.isAvailable()) || (assoc.getWeight() > best_assoc.getWeight())) { | ||
best_assoc = assoc; | ||
} | ||
} | ||
} | ||
if (best_assoc.isAvailable()) { | ||
momentum = edm4hep::utils::magnitude(best_assoc.getSim().getMomentum()); | ||
} else { | ||
warning("Can't find association for cluster. Skipping..."); | ||
continue; | ||
} | ||
} | ||
|
||
feature_tensor.addToFloatData(momentum); | ||
feature_tensor.addToFloatData(cluster.getEnergy() / momentum); | ||
auto pos = cluster.getPosition(); | ||
feature_tensor.addToFloatData(edm4hep::utils::anglePolar(pos)); | ||
feature_tensor.addToFloatData(edm4hep::utils::angleAzimuthal(pos)); | ||
for (int par_ix = 0; par_ix < cluster.shapeParameters_size(); par_ix++) { | ||
feature_tensor.addToFloatData(cluster.getShapeParameters(par_ix)); | ||
} | ||
|
||
if (cluster_assocs) { | ||
edm4eic::MCRecoClusterParticleAssociation best_assoc; | ||
for (auto assoc : *cluster_assocs) { | ||
if (assoc.getRec() == cluster) { | ||
if ((not best_assoc.isAvailable()) || (assoc.getWeight() > best_assoc.getWeight())) { | ||
best_assoc = assoc; | ||
} | ||
} | ||
} | ||
int64_t is_electron = 0, is_pion = 0; | ||
if (best_assoc.isAvailable()) { | ||
is_electron = best_assoc.getSim().getPDG() == 11; | ||
is_pion = best_assoc.getSim().getPDG() != 11; | ||
} | ||
target_tensor.addToInt64Data(is_pion); | ||
target_tensor.addToInt64Data(is_electron); | ||
} | ||
} | ||
|
||
size_t expected_num_entries = feature_tensor.getShape(0) * feature_tensor.getShape(1); | ||
if (feature_tensor.floatData_size() != expected_num_entries) { | ||
error("Inconsistent output tensor shape and element count: {} != {}", feature_tensor.floatData_size(), expected_num_entries); | ||
throw std::runtime_error(fmt::format("Inconsistent output tensor shape and element count: {} != {}", feature_tensor.floatData_size(), expected_num_entries)); | ||
} | ||
} | ||
|
||
} // namespace eicrecon | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
// SPDX-License-Identifier: LGPL-3.0-or-later | ||
// Copyright (C) 2024 Dmitry Kalinkin | ||
|
||
#pragma once | ||
|
||
#include <algorithms/algorithm.h> | ||
#include <edm4eic/ClusterCollection.h> | ||
#include <edm4eic/MCRecoClusterParticleAssociationCollection.h> | ||
#include <edm4eic/TensorCollection.h> | ||
#include <optional> | ||
#include <string> | ||
#include <string_view> | ||
|
||
#include "algorithms/interfaces/WithPodConfig.h" | ||
|
||
namespace eicrecon { | ||
|
||
using CalorimeterParticleIDPreMLAlgorithm = | ||
algorithms::Algorithm<algorithms::Input<edm4eic::ClusterCollection, | ||
std::optional<edm4eic::MCRecoClusterParticleAssociationCollection>>, | ||
algorithms::Output<edm4eic::TensorCollection, | ||
std::optional<edm4eic::TensorCollection>>>; | ||
|
||
class CalorimeterParticleIDPreML : public CalorimeterParticleIDPreMLAlgorithm, | ||
public WithPodConfig<NoConfig> { | ||
|
||
public: | ||
CalorimeterParticleIDPreML(std::string_view name) | ||
: CalorimeterParticleIDPreMLAlgorithm{name, | ||
{"inputClusters"}, | ||
{"outputFeatureTensor", "outputTargetTensor"}, | ||
""} { | ||
} | ||
|
||
void init() final; | ||
void process(const Input&, const Output&) const final; | ||
}; | ||
|
||
} // namespace eicrecon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.