Skip to content

Commit

Permalink
Add an algorithm for merging collections (#209)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmcarcell authored Jul 19, 2024
1 parent a3380b2 commit 724e2c9
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 11 deletions.
143 changes: 143 additions & 0 deletions k4FWCore/components/CollectionMerger.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
* Copyright (c) 2014-2024 Key4hep-Project.
*
* This file is part of Key4hep.
* See https://key4hep.github.io/key4hep-doc/ for further info.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/** @class CollectionMerger
*
* Merges collections of the same type into a single collection.
* The output collection is created as a subset collection if the Copy property is set to false (default).
* The collections are merged by copying the elements of the input collections into the output collection.
*
* In both cases, the relations in the new objects point to the original objects. This means that collections
* having relations to objects in different collections will need these collections to be present to fully
* work. If the collections are not present (for example, after dropping them with the output commands)
* and this relations are used, that will almost certainly crash. An example of an usage of this algorithm
* is to merge several collections to give to an algorithm so that it can use all of them at the same time.
*
* The collections to be merged are specified in the InputCollections property, which is a list of collection names.
* The output collection is specified in the OutputCollection property.
*/

#include "edm4hep/edm4hep.h"

#include "k4FWCore/Transformer.h"

#include <map>
#include <memory>
#include <string>
#include <string_view>

struct CollectionMerger final : k4FWCore::Transformer<std::shared_ptr<podio::CollectionBase>(
const std::vector<const std::shared_ptr<podio::CollectionBase>*>&)> {
CollectionMerger(const std::string& name, ISvcLocator* svcLoc)
: Transformer(name, svcLoc, {KeyValues("InputCollections", {"MCParticles"})},
{KeyValues("OutputCollection", {"NewMCParticles"})}) {
if (System::cmdLineArgs()[0].find("genconf") != std::string::npos) {
return;
}
m_map["edm4hep::MCParticleCollection"] = &CollectionMerger::mergeCollections<edm4hep::MCParticleCollection>;
m_map["edm4hep::SimTrackerHitCollection"] = &CollectionMerger::mergeCollections<edm4hep::SimTrackerHitCollection>;
m_map["edm4hep::CaloHitContributionCollection"] =
&CollectionMerger::mergeCollections<edm4hep::CaloHitContributionCollection>;
m_map["edm4hep::SimCalorimeterHitCollection"] =
&CollectionMerger::mergeCollections<edm4hep::SimCalorimeterHitCollection>;
m_map["edm4hep::RawCalorimeterHitCollection"] =
&CollectionMerger::mergeCollections<edm4hep::RawCalorimeterHitCollection>;
m_map["edm4hep::CalorimeterHitCollection"] = &CollectionMerger::mergeCollections<edm4hep::CalorimeterHitCollection>;
m_map["edm4hep::ParticleIDCollection"] = &CollectionMerger::mergeCollections<edm4hep::ParticleIDCollection>;
m_map["edm4hep::ClusterCollection"] = &CollectionMerger::mergeCollections<edm4hep::ClusterCollection>;
m_map["edm4hep::TrackerHit3DCollection"] = &CollectionMerger::mergeCollections<edm4hep::TrackerHit3DCollection>;
m_map["edm4hep::TrackerHitPlaneCollection"] =
&CollectionMerger::mergeCollections<edm4hep::TrackerHitPlaneCollection>;
m_map["edm4hep::RawTimeSeriesCollection"] = &CollectionMerger::mergeCollections<edm4hep::RawTimeSeriesCollection>;
m_map["edm4hep::TrackCollection"] = &CollectionMerger::mergeCollections<edm4hep::TrackCollection>;
m_map["edm4hep::VertexCollection"] = &CollectionMerger::mergeCollections<edm4hep::VertexCollection>;
m_map["edm4hep::ReconstructedParticleCollection"] =
&CollectionMerger::mergeCollections<edm4hep::ReconstructedParticleCollection>;
m_map["edm4hep::MCRecoParticleAssociationCollection"] =
&CollectionMerger::mergeCollections<edm4hep::MCRecoParticleAssociationCollection>;
m_map["edm4hep::MCRecoCaloAssociationCollection"] =
&CollectionMerger::mergeCollections<edm4hep::MCRecoCaloAssociationCollection>;
m_map["edm4hep::MCRecoTrackerAssociationCollection"] =
&CollectionMerger::mergeCollections<edm4hep::MCRecoTrackerAssociationCollection>;
m_map["edm4hep::MCRecoCaloParticleAssociationCollection"] =
&CollectionMerger::mergeCollections<edm4hep::MCRecoCaloParticleAssociationCollection>;
m_map["edm4hep::MCRecoClusterParticleAssociationCollection"] =
&CollectionMerger::mergeCollections<edm4hep::MCRecoClusterParticleAssociationCollection>;
m_map["edm4hep::MCRecoTrackParticleAssociationCollection"] =
&CollectionMerger::mergeCollections<edm4hep::MCRecoTrackParticleAssociationCollection>;
m_map["edm4hep::RecoParticleVertexAssociationCollection"] =
&CollectionMerger::mergeCollections<edm4hep::RecoParticleVertexAssociationCollection>;
m_map["edm4hep::TimeSeriesCollection"] = &CollectionMerger::mergeCollections<edm4hep::TimeSeriesCollection>;
m_map["edm4hep::RecDqdxCollection"] = &CollectionMerger::mergeCollections<edm4hep::RecDqdxCollection>;
m_map["edm4hep::GeneratorEventParametersCollection"] =
&CollectionMerger::mergeCollections<edm4hep::GeneratorEventParametersCollection>;
m_map["edm4hep::GeneratorPdfInfoCollection"] =
&CollectionMerger::mergeCollections<edm4hep::GeneratorPdfInfoCollection>;
}

std::shared_ptr<podio::CollectionBase> operator()(
const std::vector<const std::shared_ptr<podio::CollectionBase>*>& input) const override {
std::shared_ptr<podio::CollectionBase> ret;
debug() << "Merging " << input.size() << " collections" << endmsg;
std::string_view type = "";
for (const auto& coll : input) {
debug() << "Merging collection of type " << (*coll)->getTypeName() << " with " << (*coll)->size() << " elements"
<< endmsg;
if (type.empty()) {
type = (*coll)->getTypeName();
} else if (type != (*coll)->getTypeName()) {
throw std::runtime_error("Different collection types are not supported");
return ret;
}
(this->*m_map.at((*coll)->getTypeName()))(*coll, ret);
}
return ret;
}

private:
using MergeType = void (CollectionMerger::*)(const std::shared_ptr<podio::CollectionBase>&,
std::shared_ptr<podio::CollectionBase>&) const;
std::map<std::string_view, MergeType> m_map;
Gaudi::Property<bool> m_copy{this, "Copy", false,
"Copy the elements of the collections instead of creating a subset collection"};

template <typename T>
void mergeCollections(const std::shared_ptr<podio::CollectionBase>& source,
std::shared_ptr<podio::CollectionBase>& ret) const {
if (!ret) {
ret.reset(new T());
if (!m_copy) {
ret->setSubsetCollection();
}
}
const auto ptr = std::static_pointer_cast<T>(ret);
const auto sourceColl = std::static_pointer_cast<T>(source);
if (m_copy) {
for (const auto& elem : *sourceColl) {
ptr->push_back(elem.clone());
}
} else {
for (const auto& elem : *sourceColl) {
ptr->push_back(elem);
}
}
}
};

DECLARE_COMPONENT(CollectionMerger)
27 changes: 22 additions & 5 deletions k4FWCore/include/k4FWCore/FunctionalUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,14 @@ namespace k4FWCore {
namespace details {

// This function will be used to modify std::shared_ptr<podio::CollectionBase> to the actual collection type
template <typename T, typename P> const auto& maybeTransformToEDM4hep(const P& arg) { return arg; }
template <typename T, typename P> auto maybeTransformToEDM4hep(P& arg) { return arg; }

template <typename T, typename P>
requires std::same_as<P, std::vector<std::string, std::shared_ptr<podio::CollectionBase>>>
auto maybeTransformToEDM4hep(P& arg) {
return arg;
}

template <typename T, typename P>
requires std::is_base_of_v<podio::CollectionBase, P>
const auto& maybeTransformToEDM4hep(P* arg) {
Expand All @@ -69,7 +76,8 @@ namespace k4FWCore {
template <typename T> struct isVectorLike : std::false_type {};

template <typename Value>
requires std::is_base_of_v<podio::CollectionBase, std::remove_cvref_t<Value>>
requires std::is_base_of_v<podio::CollectionBase, std::remove_cvref_t<Value>> ||
std::is_same_v<std::shared_ptr<podio::CollectionBase>, std::remove_cvref_t<Value>>
struct isVectorLike<std::vector<Value*>> : std::true_type {};

template <typename Value>
Expand All @@ -96,6 +104,12 @@ namespace k4FWCore {
return std::shared_ptr<podio::CollectionBase>(std::make_shared<T>(std::move(arg)));
}

template <typename T>
requires std::is_same_v<T, std::shared_ptr<podio::CollectionBase>>
auto convertToSharedPtr(T&& arg) {
return std::move(arg);
}

template <typename... In> struct filter_evtcontext_tt {
static_assert(!std::disjunction_v<std::is_same<EventContext, In>...>,
"EventContext can only appear as first argument");
Expand Down Expand Up @@ -127,11 +141,14 @@ namespace k4FWCore {
std::remove_pointer_t<typename std::tuple_element_t<Index, std::tuple<In...>>::value_type>;
auto inputMap = std::vector<const EDM4hepType*>();
for (auto& handle : std::get<Index>(handles)) {
auto in = get(handle, thisClass, Gaudi::Hive::currentContext());
inputMap.push_back(static_cast<EDM4hepType*>(in.get()));
if constexpr (std::is_same_v<EDM4hepType, const std::shared_ptr<podio::CollectionBase>>) {
inputMap.push_back(&get(handle, thisClass, Gaudi::Hive::currentContext()));
} else {
auto in = get(handle, thisClass, Gaudi::Hive::currentContext());
inputMap.push_back(static_cast<EDM4hepType*>(in.get()));
}
}
std::get<Index>(inputTuple) = std::move(inputMap);

} else {
try {
auto in = get(std::get<Index>(handles)[0], thisClass, Gaudi::Hive::currentContext());
Expand Down
10 changes: 5 additions & 5 deletions k4FWCore/include/k4FWCore/Transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ namespace k4FWCore {
: Gaudi::Functional::details::DataHandleMixin<std::tuple<>, std::tuple<>, Traits_> {
using Gaudi::Functional::details::DataHandleMixin<std::tuple<>, std::tuple<>, Traits_>::DataHandleMixin;

static_assert(
((std::is_base_of_v<podio::CollectionBase, In> || isVectorLike_v<In>)&&...),
"Transformer and Producer input types must be EDM4hep collections or vectors of collection pointers");
static_assert((std::is_base_of_v<podio::CollectionBase, Out> || isVectorLike_v<Out>),
"Transformer and Producer output types must be EDM4hep collections or vectors of collections");
static_assert(((std::is_base_of_v<podio::CollectionBase, In> || isVectorLike_v<In>)&&...),
"Transformer and Producer input types must be EDM4hep collections or maps to collections");
static_assert((std::is_base_of_v<podio::CollectionBase, Out> || isVectorLike_v<Out> ||
std::is_same_v<std::shared_ptr<podio::CollectionBase>, Out>),
"Transformer and Producer output types must be EDM4hep collections or maps to collections");

template <typename T>
using InputHandle_t = Gaudi::Functional::details::InputHandle_t<Traits_, std::remove_pointer_t<T>>;
Expand Down
4 changes: 3 additions & 1 deletion test/k4FWCoreTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,11 @@ add_test_with_env(FunctionalTransformerRuntimeCollections options/ExampleFunctio
add_test_with_env(FunctionalTransformerRuntimeEmpty options/ExampleFunctionalTransformerRuntimeEmpty.py)
add_test_with_env(FunctionalTransformerRuntimeCollectionsMultiple options/ExampleFunctionalTransformerRuntimeCollectionsMultiple.py)
add_test_with_env(FunctionalTransformerHist options/ExampleFunctionalTransformerHist.py)
add_test_with_env(FunctionalCollectionMerger options/ExampleFunctionalCollectionMerger.py)

add_test(NAME FunctionalCheckFiles COMMAND python3 ${CMAKE_CURRENT_LIST_DIR}/options/CheckOutputFiles.py)
set_tests_properties(FunctionalCheckFiles PROPERTIES DEPENDS "FunctionalFile;FunctionalMTFile;FunctionalMultipleFile;FunctionalOutputCommands;FunctionalProducerAbsolutePath;FunctionalTransformerRuntimeEmpty;FunctionalMix;FunctionalMixIOSvc;FunctionalTransformerHist")
set_tests_properties(FunctionalCheckFiles PROPERTIES DEPENDS "FunctionalFile;FunctionalMTFile;FunctionalMultipleFile;FunctionalOutputCommands;FunctionalProducerAbsolutePath;FunctionalTransformerRuntimeEmpty;FunctionalMix;FunctionalMixIOSvc;FunctionalTransformerHist;FunctionalCollectionMerger")

# Do this after checking the files not to overwrite them
add_test_with_env(FunctionalFile_toolong options/ExampleFunctionalFile.py -n 999 PROPERTIES DEPENDS FunctionalCheckFiles PASS_REGULAR_EXPRESSION
"Application Manager Terminated successfully with a user requested ScheduledStop")
Expand Down
11 changes: 11 additions & 0 deletions test/k4FWCoreTest/options/CheckOutputFiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,14 @@ def check_collections(filename, names):
raise RuntimeError(
"Directory structure does not match expected for functional_transformer_hist.root"
)

check_collections(
"functional_merged_collections.root",
["MCParticles1", "MCParticles2", "MCParticles3", "NewMCParticles", "SimTrackerHits"],
)

podio_reader = podio.root_io.Reader("functional_merged_collections.root")
frames = podio_reader.get("events")
ev = frames[0]
if len(ev.get("NewMCParticles")) != 4:
raise RuntimeError(f"Expected 4 NewMCParticles but got {len(ev.get('NewMCParticles'))}")
66 changes: 66 additions & 0 deletions test/k4FWCoreTest/options/ExampleFunctionalCollectionMerger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#
# Copyright (c) 2014-2024 Key4hep-Project.
#
# This file is part of Key4hep.
# See https://key4hep.github.io/key4hep-doc/ for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# This is an example reading from a file and using a consumer with several inputs
# to check that the contents of the file are the expected ones

from Gaudi.Configuration import INFO, DEBUG
from Configurables import CollectionMerger
from Configurables import EventDataSvc
from k4FWCore import ApplicationMgr, IOSvc

from Configurables import ExampleFunctionalProducer

svc = IOSvc("IOSvc")
svc.input = "functional_producer_multiple.root"
svc.output = "functional_merged_collections.root"
svc.outputCommands = [
"drop *",
"keep MCParticles1",
"keep MCParticles2",
"keep MCParticles3",
"keep NewMCParticles",
"keep SimTrackerHits",
]


particle_producer = ExampleFunctionalProducer(
OutputCollection=["MCParticles3"],
)


merger = CollectionMerger(
"CollectionMerger",
# List of collections to concatenate
InputCollections=["MCParticles2", "MCParticles1", "MCParticles3"],
# Name of the single output collection
OutputCollection=["NewMCParticles"],
OutputLevel=DEBUG,
)

# If we want to copy instead of creating a subset collection
# merger.Copy = True

mgr = ApplicationMgr(
TopAlg=[particle_producer, merger],
EvtSel="NONE",
EvtMax=-1,
ExtSvc=[EventDataSvc("EventDataSvc")],
OutputLevel=INFO,
)

0 comments on commit 724e2c9

Please sign in to comment.