diff --git a/CMakeLists.txt b/CMakeLists.txt index b3519d592..add9b84a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,7 +151,9 @@ add_compile_flags_if_supported(-Wno-maybe-uninitialized) # Check MPI status # if MPI_CXX_COMPILER is not empty and XACC_ENABLE_MPI is set # turn MPI_ENABLED on -if(NOT MPI_CXX_COMPILER STREQUAL "" AND XACC_ENABLE_MPI) +# Update: we don't really need to give the path to the compiler +# because if MPI is found, MPI_CXX_COMPILER is populated +if(XACC_ENABLE_MPI) find_package(MPI) if(MPI_FOUND) diff --git a/quantum/plugins/decorators/hpc-virtualization/MPIProxy.cpp b/quantum/plugins/decorators/hpc-virtualization/MPIProxy.cpp index 604ec2e17..c5013c24e 100644 --- a/quantum/plugins/decorators/hpc-virtualization/MPIProxy.cpp +++ b/quantum/plugins/decorators/hpc-virtualization/MPIProxy.cpp @@ -4,14 +4,25 @@ Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ #include "MPIProxy.hpp" - -#include "mpi.h" - #include - #include #include +template <> +MPI_Datatype MPIDataTypeResolver::getMPIDatatype() { + return MPI_INT; +} + +template <> +MPI_Datatype MPIDataTypeResolver::getMPIDatatype() { + return MPI_DOUBLE; +} + +template <> +MPI_Datatype MPIDataTypeResolver::getMPIDatatype() { + return MPI_CHAR; +} + namespace xacc { //Temporary buffers: diff --git a/quantum/plugins/decorators/hpc-virtualization/MPIProxy.hpp b/quantum/plugins/decorators/hpc-virtualization/MPIProxy.hpp index eaffa06ff..5cbb6889e 100644 --- a/quantum/plugins/decorators/hpc-virtualization/MPIProxy.hpp +++ b/quantum/plugins/decorators/hpc-virtualization/MPIProxy.hpp @@ -9,6 +9,13 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ #include #include #include +#include "mpi.h" + +template +class MPIDataTypeResolver { +public: + MPI_Datatype getMPIDatatype(); +}; namespace xacc { @@ -142,6 +149,49 @@ class ProcessGroup { different MPI processes, thus putting them into disjoint subgroups. **/ std::shared_ptr split(int my_subgroup) const; + + // some useful wrappers + + // I could move this to a single function, but don't + // want to abuse template specialization here + // this broadcasts a single element (int/char/double) + template + void broadcast(T element) { + + MPIDataTypeResolver resolver; + MPI_Datatype mpiType = resolver.getMPIDatatype(); + MPI_Bcast(&element, 1, mpiType, 0, + this->getMPICommProxy().getRef()); + } + + // this broadcasts a vector + template + void broadcast(std::vector &vec) { + + MPIDataTypeResolver resolver; + MPI_Datatype mpiType = resolver.getMPIDatatype(); + MPI_Bcast(vec.data(), vec.size(), mpiType, 0, + this->getMPICommProxy().getRef()); + }; + + + // this Allgatherv's the content of local vectors + // into a global vector + template + void allGatherv(std::vector &local, + std::vector &global, + std::vector &nLocalData, + std::vector &shift) { + + MPIDataTypeResolver resolver; + MPI_Datatype mpiType = resolver.getMPIDatatype(); + MPI_Allgatherv(local.data(), local.size(), mpiType, + global.data(), nLocalData.data(), + shift.data(), mpiType, + this->getMPICommProxy().getRef()); + + } + protected: std::vector process_ranks_; //global ranks of the MPI processes forming the process group diff --git a/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.cpp b/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.cpp index 92c896e13..dd123d737 100644 --- a/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.cpp +++ b/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.cpp @@ -14,13 +14,32 @@ #include "hpc_virt_decorator.hpp" #include "InstructionIterator.hpp" #include "Utils.hpp" -#include "xacc.hpp" +#include "xacc_service.hpp" +#include "TearDown.hpp" #include +namespace { +static bool hpcVirtDecoratorInitializedMpi = false; +} + namespace xacc { namespace quantum { void HPCVirtDecorator::initialize(const HeterogeneousMap ¶ms) { + + if (!qpuComm) { + // Initializing MPI here + int provided, isMPIInitialized; + MPI_Initialized(&isMPIInitialized); + if (!isMPIInitialized) { + MPI_Init_thread(0, NULL, MPI_THREAD_MULTIPLE, &provided); + hpcVirtDecoratorInitializedMpi = true; + if (provided != MPI_THREAD_MULTIPLE) { + xacc::warning("MPI_THREAD_MULTIPLE not provided."); + } + } + } + decoratedAccelerator->initialize(params); if (params.keyExists("n-virtual-qpus")) { @@ -34,6 +53,29 @@ void HPCVirtDecorator::initialize(const HeterogeneousMap ¶ms) { } n_virtual_qpus = params.get("n-virtual-qpus"); } + + shots = -1; + if (params.keyExists("shots")) { + shots = params.get("shots"); + if (shots < 1) { + xacc::error("Invalid 'shots' parameter."); + } + } + + isVqeMode = false; + if (params.keyExists("vqe-mode")) { + isVqeMode = params.get("vqe-mode"); + } else { + isVqeMode = (shots < 1); + } + + if (shots >= 1 && isVqeMode || shots < 1 && !isVqeMode) { + xacc::error("Please choose between shot-based simulation or VQE mode."); + } else if (shots >= 1) { + xacc::info("Running shot-based simulation."); + } else { + xacc::info("Enable VQE mode."); + } } void HPCVirtDecorator::updateConfiguration(const HeterogeneousMap &config) { @@ -109,8 +151,14 @@ void HPCVirtDecorator::execute( // Give that sub communicator to the accelerator void *qpu_comm_ptr = reinterpret_cast(qpuComm->getMPICommProxy().getRef()); - decoratedAccelerator->updateConfiguration( - {{"mpi-communicator", qpu_comm_ptr}}); + + // this enables shot-based simulation + HeterogeneousMap properties; + properties.insert("mpi-communicator", qpu_comm_ptr); + if (!isVqeMode) { + properties.insert("shots", shots); + } + decoratedAccelerator->updateConfiguration(properties); // get the number of sub-communicators // Everybody split the CompositeInstructions vector into n_virtual_qpu @@ -146,16 +194,22 @@ void HPCVirtDecorator::execute( } // broadcast the total number of children - MPI_Bcast(&nGlobalChildren, 1, MPI_INT, 0, - qpuComm->getMPICommProxy().getRef()); + qpuComm->broadcast(nGlobalChildren); // broadcast the number of children in each communicator - MPI_Bcast(nLocalChildren.data(), nLocalChildren.size(), MPI_INT, 0, - qpuComm->getMPICommProxy().getRef()); + qpuComm->broadcast(nLocalChildren); - // get expectation values and the size of the key of each child buffer - std::vector globalExpVals(nGlobalChildren); + // get expectation values/bitstrings and the size of the key of each child + // buffer std::vector globalKeySizes(nGlobalChildren); + std::vector globalNumberBitStrings; + std::vector globalExpVals; + if (isVqeMode) { + globalExpVals.resize(nGlobalChildren); + } else { + globalNumberBitStrings.resize(nGlobalChildren); + } + if (world_rank == qpuComm->getProcessRanks()[0]) { // get displacements for the keys in each comm @@ -167,47 +221,85 @@ void HPCVirtDecorator::execute( // get size of each key in the communicator std::vector localExpVals; - std::vector localKeySizes; + std::vector localKeySizes, localNumberBitStrings; for (auto child : my_buffer->getChildren()) { localKeySizes.push_back(child->name().size()); - localExpVals.push_back(child->getExpectationValueZ()); - } - // gather all expectation values - MPI_Allgatherv(localExpVals.data(), localExpVals.size(), MPI_DOUBLE, - globalExpVals.data(), nLocalChildren.data(), - nKeyShift.data(), MPI_DOUBLE, - zeroRanksComm->getMPICommProxy().getRef()); + if (isVqeMode) { + localExpVals.push_back(child->getExpectationValueZ()); + } else { + localNumberBitStrings.push_back(child->getMeasurementCounts().size()); + } + } // gather the size of each child key - MPI_Allgatherv(localKeySizes.data(), localKeySizes.size(), MPI_INT, - globalKeySizes.data(), nLocalChildren.data(), - nKeyShift.data(), MPI_INT, - zeroRanksComm->getMPICommProxy().getRef()); + zeroRanksComm->allGatherv(localKeySizes, globalKeySizes, nLocalChildren, + nKeyShift); + + if (isVqeMode) { + // gather all expectation values + zeroRanksComm->allGatherv(localExpVals, globalExpVals, nLocalChildren, + nKeyShift); + } else { + // gather all bitstrings + zeroRanksComm->allGatherv(localNumberBitStrings, globalNumberBitStrings, + nLocalChildren, nKeyShift); + } } - // broadcast expectation values - MPI_Bcast(globalExpVals.data(), globalExpVals.size(), MPI_DOUBLE, 0, - qpuComm->getMPICommProxy().getRef()); - // broadcast size of each key - MPI_Bcast(globalKeySizes.data(), globalKeySizes.size(), MPI_INT, 0, - qpuComm->getMPICommProxy().getRef()); + qpuComm->broadcast(globalKeySizes); + + // broadcast results + if (isVqeMode) { + // broadcast expectation values + qpuComm->broadcast(globalExpVals); + } else { + // broadcast number of bit strings + qpuComm->broadcast(globalNumberBitStrings); + } // get the size of all keys auto nGlobalKeyChars = std::accumulate(globalKeySizes.begin(), globalKeySizes.end(), 0); + // get total number of measured bitstrings + auto nGlobalBitStrings = std::accumulate(globalNumberBitStrings.begin(), + globalNumberBitStrings.end(), 0); + // gather all keys chars std::vector globalKeyChars(nGlobalKeyChars); + std::vector globalBitStrings, globalCounts; + if (!isVqeMode) { + globalBitStrings.resize(nGlobalBitStrings); + globalCounts.resize(nGlobalBitStrings); + } if (world_rank == qpuComm->getProcessRanks()[0]) { // get local key char arrays + // and local bitstrings and counts std::vector localKeys; + std::vector localBitStringIndices, localCounts; for (auto child : my_buffer->getChildren()) { + for (auto c : child->name()) { localKeys.push_back(c); } + + // get bitstring decimals and counts + if (!isVqeMode) { + for (auto &count : child->getMeasurementCounts()) { + auto bitString = count.first; + // stoi is MSB + if (decoratedAccelerator->getBitOrder() == + Accelerator::BitOrder::LSB) { + std::reverse(bitString.begin(), bitString.end()); + } + auto index = std::stoi(count.first, nullptr, 2); + localBitStringIndices.push_back(index); + localCounts.push_back(count.second); + } + } } // get the size of keys in the communicator @@ -227,18 +319,58 @@ void HPCVirtDecorator::execute( } // gather all key chars - MPI_Allgatherv(localKeys.data(), localKeys.size(), MPI_CHAR, - globalKeyChars.data(), commKeySize.data(), - keySizeShift.data(), MPI_CHAR, - zeroRanksComm->getMPICommProxy().getRef()); + zeroRanksComm->allGatherv(localKeys, globalKeyChars, commKeySize, + keySizeShift); + + if (!isVqeMode) { + + // get number of bit strings in the communicator + std::vector commNumberBitStrings(n_virtual_qpus); + shift = 0; + for (int i = 0; i < n_virtual_qpus; i++) { + auto it = globalNumberBitStrings.begin() + shift; + commNumberBitStrings[i] = + std::accumulate(it, it + nLocalChildren[i], 0); + shift += nLocalChildren[i]; + } + + // shifts for bit strings + std::vector bitStringShift(n_virtual_qpus); + for (int i = 1; i < n_virtual_qpus; i++) { + bitStringShift[i] = std::accumulate( + commNumberBitStrings.begin(), commNumberBitStrings.begin() + i, 0); + } + + // gather all bit strings + zeroRanksComm->allGatherv(localBitStringIndices, globalBitStrings, + commNumberBitStrings, bitStringShift); + // gather all counts + zeroRanksComm->allGatherv(localCounts, globalCounts, commNumberBitStrings, + bitStringShift); + } } // broadcast all keys - MPI_Bcast(globalKeyChars.data(), globalKeyChars.size(), MPI_CHAR, 0, - qpuComm->getMPICommProxy().getRef()); + qpuComm->broadcast(globalKeyChars); + + if (!isVqeMode) { + // broadcast indices + qpuComm->broadcast(globalBitStrings); + qpuComm->broadcast(globalCounts); + } + + // get binary from decimal + const auto getBinary = [=](int decimal) { + std::string s; + do { + s += (decimal % 2 == 0 ? "0" : "1"); + decimal /= 2; + } while (decimal != 0); + return s; + }; // now every process has everything to rebuild the buffer - int shift = 0; + int shift = 0, countShift = 0; for (int i = 0; i < nGlobalChildren; i++) { // get child name @@ -248,7 +380,33 @@ void HPCVirtDecorator::execute( // create child buffer and append it to buffer auto child = xacc::qalloc(buffer->size()); child->setName(name); - child->addExtraInfo("exp-val-z", globalExpVals[i]); + + if (isVqeMode) { + + child->addExtraInfo("exp-val-z", globalExpVals[i]); + + } else { + + auto nChildBitStrings = globalNumberBitStrings[i]; + for (int b = 0; b < nChildBitStrings; b++) { + + auto counts = globalCounts[b + countShift]; + auto bitStringDecimal = globalBitStrings[b + countShift]; + auto bitString = getBinary(bitStringDecimal); + // check if we need to pad zeros + auto nMeasuredBits = + std::count_if(name.begin(), name.end(), [](char c) { + return std::string("XYZ").find(c) != std::string::npos; + }); + if (nMeasuredBits > bitString.size()) { + bitString += std::string(nMeasuredBits - bitString.size(), '0'); + } + std::reverse(bitString.begin(), bitString.end()); + child->appendMeasurement(bitString, counts); + } + countShift += nChildBitStrings; + } + buffer->appendChild(name, child); shift += globalKeySizes[i]; } @@ -261,6 +419,38 @@ void HPCVirtDecorator::execute( return; } +void HPCVirtDecorator::finalize() { + if (qpuComm) { + // Make sure we explicitly release this so that MPICommProxy is destroyed + // before framework shutdown (MPI_Finalize if needed) + qpuComm.reset(); + } +} + +class HPCVirtTearDown : public xacc::TearDown { +public: + virtual void tearDown() override { + auto c = xacc::getService("hpc-virtualization", + false); + if (c) { + auto casted = + std::dynamic_pointer_cast(c); + assert(casted); + casted->finalize(); + } + + int finalized, initialized; + MPI_Initialized(&initialized); + if (initialized) { + MPI_Finalized(&finalized); + if (!finalized && hpcVirtDecoratorInitializedMpi) { + MPI_Finalize(); + } + } + } + virtual std::string name() const override { return "xacc-hpc-virt"; } +}; + } // namespace quantum } // namespace xacc @@ -282,6 +472,8 @@ class US_ABI_LOCAL HPCVirtActivator : public BundleActivator { context.RegisterService(c); context.RegisterService(c); + context.RegisterService( + std::make_shared()); } /** diff --git a/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.hpp b/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.hpp index 691597b90..4a12fd811 100644 --- a/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.hpp +++ b/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.hpp @@ -14,7 +14,6 @@ #ifndef XACC_HPC_VIRT_DECORATOR_HPP_ #define XACC_HPC_VIRT_DECORATOR_HPP_ -#include "mpi.h" #include "xacc.hpp" #include "MPIProxy.hpp" #include "AcceleratorDecorator.hpp" @@ -26,7 +25,8 @@ namespace quantum { class HPCVirtDecorator : public AcceleratorDecorator { protected: - int n_virtual_qpus = 1; + bool isVqeMode; + int n_virtual_qpus = 1, shots; // The MPI communicator for each QPU std::shared_ptr qpuComm; @@ -45,31 +45,30 @@ class HPCVirtDecorator : public AcceleratorDecorator { const std::string name() const override { return "hpc-virtualization"; } const std::string description() const override { return ""; } + void finalize(); - ~HPCVirtDecorator() override { } + ~HPCVirtDecorator() override { }; private: - template - std::vector> split_vector(const std::vector &vec, - size_t n) { - std::vector> outVec; - size_t length = vec.size() / n; - size_t remain = vec.size() % n; +template +std::vector> split_vector(const std::vector& inputVector, size_t numSegments) { + std::vector> result; - size_t begin = 0; - size_t end = 0; + size_t inputSize = inputVector.size(); + size_t segmentSize = (inputSize + numSegments - 1) / numSegments; // Ceiling division - for (size_t i = 0; i < std::min(n, vec.size()); ++i) { - end += (remain > 0) ? (length + !!(remain--)) : length; + auto begin = inputVector.begin(); + auto end = inputVector.end(); - outVec.push_back(std::vector(vec.begin() + begin, vec.begin() + end)); - - begin = end; + for (size_t i = 0; i < numSegments; ++i) { + auto segmentEnd = std::next(begin, std::min(segmentSize, static_cast(std::distance(begin, end)))); + result.emplace_back(begin, segmentEnd); + begin = segmentEnd; } - return outVec; - } + return result; +} }; } // namespace quantum diff --git a/xacc/CMakeLists.txt b/xacc/CMakeLists.txt index 40f827085..d6cdbe1ff 100644 --- a/xacc/CMakeLists.txt +++ b/xacc/CMakeLists.txt @@ -85,17 +85,14 @@ message( ${NLOHMANN_INCLUDE_DIR} ${SPDLOG_INCLUDE_DIR}) - # linking against MPI libraries found by cmake - if(MPI_FOUND) - target_link_libraries(xacc - PUBLIC CppMicroServices ${MPI_CXX_LIBRARIES} - PRIVATE cpr ${LIBUNWIND_LIBRARIES} - ${LIBUNWINDX86_LIBRARIES}) - else() - target_link_libraries(xacc + target_link_libraries(xacc PUBLIC CppMicroServices - PRIVATE cpr ${LIBUNWIND_LIBRARIES} + PRIVATE cpr + ${LIBUNWIND_LIBRARIES} ${LIBUNWINDX86_LIBRARIES}) + # linking against MPI libraries found by cmake + if(MPI_FOUND) + target_link_libraries(xacc PUBLIC CppMicroServices ${MPI_CXX_LIBRARIES}) endif() else() @@ -119,11 +116,10 @@ else() ${NLOHMANN_INCLUDE_DIR} ${SPDLOG_INCLUDE_DIR}) + target_link_libraries(xacc PUBLIC CppMicroServices PRIVATE cpr) # linking against MPI libraries found by cmake if(MPI_FOUND) - target_link_libraries(xacc PUBLIC CppMicroServices ${MPI_CXX_LIBRARIES} PRIVATE cpr) - else() - target_link_libraries(xacc PUBLIC CppMicroServices PRIVATE cpr) + target_link_libraries(xacc PUBLIC ${MPI_CXX_LIBRARIES}) endif() endif() diff --git a/xacc/xacc.cpp b/xacc/xacc.cpp index 89a9cabce..c67fa1b7c 100644 --- a/xacc/xacc.cpp +++ b/xacc/xacc.cpp @@ -30,10 +30,6 @@ #include #include "TearDown.hpp" -#ifdef MPI_ENABLED -#include "mpi.h" -#endif - using namespace cxxopts; namespace xacc { @@ -51,11 +47,6 @@ std::map> std::map> allocated_buffers{}; std::string rootPathString = ""; - -#ifdef MPI_ENABLED -int isMPIInitialized; -#endif - void set_verbose(bool v) { verbose = v; } int getArgc() { return argc; } @@ -116,18 +107,6 @@ void Initialize(int arc, char **arv) { XACCLogger::instance()->dumpQueue(); } - // Initializing MPI here -#ifdef MPI_ENABLED - int provided; - MPI_Initialized(&isMPIInitialized); - if (!isMPIInitialized) { - MPI_Init_thread(0, NULL, MPI_THREAD_MULTIPLE, &provided); - if (provided != MPI_THREAD_MULTIPLE) { - xacc::warning("MPI_THREAD_MULTIPLE not provided."); - } - isMPIInitialized = 1; - } -#endif } void setIsPyApi() { isPyApi = true; } @@ -869,12 +848,6 @@ void Finalize() { compilation_database.clear(); allocated_buffers.clear(); xacc::ServiceAPI_Finalize(); - // This replaces the HPC virtualization TearDown -#ifdef MPI_ENABLED - if (isMPIInitialized) { - MPI_Finalize(); - } -#endif } }