From c7171ffe42af3e21de802ef669cd7aae7413fee5 Mon Sep 17 00:00:00 2001 From: Sergei Isakov Date: Tue, 30 Jun 2020 23:57:46 +0200 Subject: [PATCH 1/3] Add measurement gate. --- apps/qsim_amplitudes.cc | 10 +- apps/qsim_base.cc | 11 +- apps/qsim_von_neumann.cc | 14 ++- docs/input_format.md | 4 + lib/BUILD | 7 +- lib/circuit_qsim_parser.h | 36 ++++++ lib/fuser.h | 2 +- lib/fuser_basic.h | 101 ++++++++++++++-- lib/gate.h | 28 +++++ lib/gate_appl.h | 15 +-- lib/gates_cirq.h | 3 +- lib/gates_qsim.h | 3 +- lib/hybrid.h | 13 +- lib/parfor.h | 54 +++++++-- lib/run_qsim.h | 39 +++++- lib/seqfor.h | 30 ++++- lib/statespace.h | 84 +++++++++++-- lib/statespace_avx.h | 172 ++++++++++++++++++++------- lib/statespace_basic.h | 119 +++++++++++++++---- lib/statespace_sse.h | 168 ++++++++++++++++++++------ lib/util.h | 12 +- pybind_interface/pybind_main.h | 1 - tests/circuit_qsim_parser_test.cc | 102 ++++++++++------ tests/fuser_basic_test.cc | 174 ++++++++++++++++++++++++++- tests/run_qsim_test.cc | 11 +- tests/statespace_avx_test.cc | 8 ++ tests/statespace_basic_test.cc | 8 ++ tests/statespace_sse_test.cc | 8 ++ tests/statespace_testfixture.h | 191 ++++++++++++++++++++++++++++-- 29 files changed, 1202 insertions(+), 226 deletions(-) diff --git a/apps/qsim_amplitudes.cc b/apps/qsim_amplitudes.cc index 70851c13..3d0221ee 100644 --- a/apps/qsim_amplitudes.cc +++ b/apps/qsim_amplitudes.cc @@ -33,13 +33,15 @@ constexpr char usage[] = "usage:\n ./qsim_amplitudes -c circuit_file " "-d times_to_save_results -i input_files " - "-o output_files -t num_threads -v verbosity\n"; + "-o output_files -s seed -t num_threads " + "-v verbosity\n"; struct Options { std::string circuit_file; std::vector times = {std::numeric_limits::max()}; std::vector input_files; std::vector output_files; + unsigned seed = 1; unsigned num_threads = 1; unsigned verbosity = 0; }; @@ -53,7 +55,7 @@ Options GetOptions(int argc, char* argv[]) { return std::atoi(word.c_str()); }; - while ((k = getopt(argc, argv, "c:d:i:o:t:v:")) != -1) { + while ((k = getopt(argc, argv, "c:d:i:s:o:t:v:")) != -1) { switch (k) { case 'c': opt.circuit_file = optarg; @@ -67,6 +69,9 @@ Options GetOptions(int argc, char* argv[]) { case 'o': qsim::SplitString(optarg, ',', opt.output_files); break; + case 's': + opt.seed = std::atoi(optarg); + break; case 't': opt.num_threads = std::atoi(optarg); break; @@ -170,6 +175,7 @@ int main(int argc, char* argv[]) { using Runner = QSimRunner>, Simulator>; Runner::Parameter param; + param.seed = opt.seed; param.num_threads = opt.num_threads; param.verbosity = opt.verbosity; Runner::Run(param, opt.times, circuit, measure); diff --git a/apps/qsim_base.cc b/apps/qsim_base.cc index 17ef2e6a..444da62a 100644 --- a/apps/qsim_base.cc +++ b/apps/qsim_base.cc @@ -30,19 +30,20 @@ struct Options { std::string circuit_file; unsigned maxtime = std::numeric_limits::max(); + unsigned seed = 1; unsigned num_threads = 1; unsigned verbosity = 0; }; Options GetOptions(int argc, char* argv[]) { constexpr char usage[] = "usage:\n ./qsim_base -c circuit -d maxtime " - "-t threads -v verbosity\n"; + "-s seed -t threads -v verbosity\n"; Options opt; int k; - while ((k = getopt(argc, argv, "c:d:t:v:")) != -1) { + while ((k = getopt(argc, argv, "c:d:s:t:v:")) != -1) { switch (k) { case 'c': opt.circuit_file = optarg; @@ -50,6 +51,9 @@ Options GetOptions(int argc, char* argv[]) { case 'd': opt.maxtime = std::atoi(optarg); break; + case 's': + opt.seed = std::atoi(optarg); + break; case 't': opt.num_threads = std::atoi(optarg); break; @@ -81,7 +85,7 @@ void PrintAmplitudes( "000", "001", "010", "011", "100", "101", "110", "111", }; - uint64_t size = std::min(uint64_t{8}, state_space.Size(state)); + uint64_t size = std::min(uint64_t{8}, state_space.Size()); unsigned s = 3 - std::min(unsigned{3}, num_qubits); for (uint64_t i = 0; i < size; ++i) { @@ -121,6 +125,7 @@ int main(int argc, char* argv[]) { state_space.SetStateZero(state); Runner::Parameter param; + param.seed = opt.seed; param.num_threads = opt.num_threads; param.verbosity = opt.verbosity; diff --git a/apps/qsim_von_neumann.cc b/apps/qsim_von_neumann.cc index 96d0c917..cefec425 100644 --- a/apps/qsim_von_neumann.cc +++ b/apps/qsim_von_neumann.cc @@ -32,19 +32,20 @@ struct Options { std::string circuit_file; unsigned maxtime = std::numeric_limits::max(); + unsigned seed = 1; unsigned num_threads = 1; unsigned verbosity = 0; }; Options GetOptions(int argc, char* argv[]) { constexpr char usage[] = "usage:\n ./qsim_von_neumann -c circuit -d maxtime " - "-t threads -v verbosity\n"; + "-s seed -t threads -v verbosity\n"; Options opt; int k; - while ((k = getopt(argc, argv, "c:d:t:v:")) != -1) { + while ((k = getopt(argc, argv, "c:d:s:t:v:")) != -1) { switch (k) { case 'c': opt.circuit_file = optarg; @@ -52,6 +53,9 @@ Options GetOptions(int argc, char* argv[]) { case 'd': opt.maxtime = std::atoi(optarg); break; + case 's': + opt.seed = std::atoi(optarg); + break; case 't': opt.num_threads = std::atoi(optarg); break; @@ -104,15 +108,15 @@ int main(int argc, char* argv[]) { return p != 0 ? p * std::log(p) : 0; }; - auto size = state_space.Size(state); - double entropy = -For::RunReduce(opt.num_threads, size, f, Op(), - state_space, state); + double entropy = -For::RunReduce(opt.num_threads, state_space.Size(), f, + Op(), state_space, state); IO::messagef("entropy=%g\n", entropy); }; using Runner = QSimRunner>, Simulator>; Runner::Parameter param; + param.seed = opt.seed; param.num_threads = opt.num_threads; param.verbosity = opt.verbosity; diff --git a/docs/input_format.md b/docs/input_format.md index b1f8aeaf..7fab3eed 100644 --- a/docs/input_format.md +++ b/docs/input_format.md @@ -41,3 +41,7 @@ fSim(θ, φ) | time fs qubi CPhase(φ) | time cp qubit1 qubit2 phi | 6 cp 0 1 0.78 Identity (1-qubit) | time id1 qubit | 7 id1 0 Identity (2-qubit) | time id2 qubit | 8 id2 0 1 +Measurement (n-qubit) | time m qubit1 qubit2 ... | 9 m 0 1 2 3 + +Gate times should be ordered. Measurement gates with equal times get fused +together. diff --git a/lib/BUILD b/lib/BUILD index 6904ca7b..4fdcaa2f 100644 --- a/lib/BUILD +++ b/lib/BUILD @@ -207,7 +207,10 @@ cc_library( cc_library( name = "fuser_basic", hdrs = ["fuser_basic.h"], - deps = [":fuser"], + deps = [ + ":fuser", + ":gate", + ], ) ### Helper libraries to run qsim and qsimh ### @@ -216,6 +219,7 @@ cc_library( name = "run_qsim", hdrs = ["run_qsim.h"], deps = [ + ":gate", ":gate_appl", ":util", ], @@ -235,6 +239,7 @@ cc_library( cc_library( name = "statespace", hdrs = ["statespace.h"], + deps = [":util"], ) cc_library( diff --git a/lib/circuit_qsim_parser.h b/lib/circuit_qsim_parser.h index 59afc735..40630e29 100644 --- a/lib/circuit_qsim_parser.h +++ b/lib/circuit_qsim_parser.h @@ -169,6 +169,21 @@ class CircuitQsimParser final { ss >> q0 >> q1 >> phi; if (!ValidateGate(ss, num_qubits, q0, q1, provider, k)) return false; gates.push_back(GateCP::Create(time, q0, q1, phi)); + } else if (gate_name == "m") { + std::vector qubits; + qubits.reserve(num_qubits); + do { + ss >> q0; + if (ss) { + qubits.push_back(q0); + } else { + InvalidGateError(provider, k); + return false; + } + } while (ss.peek() != std::stringstream::traits_type::eof()); + if (!ValidateGate(ss, num_qubits, qubits, provider, k)) return false; + gates.push_back(gate::Measurement>::Create( + time, std::move(qubits))); } else { InvalidGateError(provider, k); return false; @@ -247,6 +262,27 @@ class CircuitQsimParser final { return true; } + + /** + * Checks formatting for a multiqubit gate parsed from 'ss'. + * @param ss Input stream containing the gate specification. + * @param num_qubits Number of qubits, as defined at the start of the file. + * @param qubits Indices of affected qubits. + * @param provider Circuit source; only used for error reporting. + * @param line Line number of the parsed gate; only used for error reporting. + */ + static bool ValidateGate(std::stringstream& ss, unsigned num_qubits, + const std::vector& qubits, + const std::string& provider, unsigned line) { + for (auto q : qubits) { + if (q >= num_qubits) { + InvalidGateError(provider, line); + return false; + } + } + + return true; + } }; } // namespace qsim diff --git a/lib/fuser.h b/lib/fuser.h index c829a3d6..78e32b6c 100644 --- a/lib/fuser.h +++ b/lib/fuser.h @@ -24,7 +24,7 @@ struct GateFused { typename Gate::GateKind kind; // Kind of the (first) master gate. unsigned time; unsigned num_qubits; - unsigned qubits[3]; + std::vector qubits; const Gate* pmaster; std::vector gates; }; diff --git a/lib/fuser_basic.h b/lib/fuser_basic.h index a91a419c..db4e4553 100644 --- a/lib/fuser_basic.h +++ b/lib/fuser_basic.h @@ -15,9 +15,11 @@ #ifndef FUSER_BASIC_H_ #define FUSER_BASIC_H_ +#include #include #include +#include "gate.h" #include "fuser.h" namespace qsim { @@ -49,7 +51,7 @@ struct BasicGateFuser final { * applied together. Note that gates fused with this method are not * multiplied together until ApplyFusedGate is called on the output. * @param num_qubits The number of qubits acted on by 'gates'. - * @param gates The gates to be fused. + * @param gates The gates to be fused. Gate times should be ordered. * @param times_to_split_at Ordered list of time steps at which to separate * fused gates. Each element of the output will contain gates from a single * 'window' in this list. @@ -61,7 +63,17 @@ struct BasicGateFuser final { const std::vector& times_to_split_at) { std::vector gates_fused; - // Sequence of two-qubit gates and fixed single-qubit gates. + if (gates.size() == 0) return gates_fused; + + gates_fused.reserve(gates.size()); + + // Merge with measurement gate times to separate fused gates at. + auto times = MergeWithMeasurementTimes(gates, times_to_split_at); + + // Map to keep track of measurement gates with equal times. + std::map> measurement_gates; + + // Sequence of top level gates the other gates get fused to. std::vector gates_seq; // Lattice of gates: qubits "hyperplane" and time direction. @@ -70,7 +82,9 @@ struct BasicGateFuser final { // Current unfused gate index. std::size_t gate_index = 0; - for (std::size_t l = 0; l < times_to_split_at.size(); ++l) { + for (std::size_t l = 0; l < times.size(); ++l) { + if (gate_index == gates.size()) break; + gates_seq.resize(0); gates_seq.reserve(gates.size()); @@ -79,13 +93,33 @@ struct BasicGateFuser final { gates_lat[k].reserve(128); } + auto prev_time = gates[gate_index].time; + // Fill gates_seq and gates_lat in. for (; gate_index < gates.size(); ++gate_index) { const auto& gate = gates[gate_index]; - if (gates[gate_index].time > times_to_split_at[l]) break; + if (gate.time > times[l]) break; + + if (gate.time < prev_time) { + // This function assumes that gate times are ordered. + // Just stop sielently if this is not the case. + // TODO: report an error here. + gates_fused.resize(0); + return gates_fused; + } + + prev_time = gate.time; - if (gate.num_qubits == 1) { + if (gate.kind == gate::kMeasurement) { + auto& mea_gates_at_time = measurement_gates[gate.time]; + if (mea_gates_at_time.size() == 0) { + gates_seq.push_back(&gate); + mea_gates_at_time.reserve(num_qubits); + } + + mea_gates_at_time.push_back(&gate); + } else if (gate.num_qubits == 1) { gates_lat[gate.qubits[0]].push_back(&gate); if (gate.unfusible) { gates_seq.push_back(&gate); @@ -99,9 +133,13 @@ struct BasicGateFuser final { std::vector last(num_qubits, 0); + const Gate* delayed_measurement_gate = nullptr; + // Fuse gates. for (auto pgate : gates_seq) { - if (pgate->num_qubits == 1) { + if (pgate->kind == gate::kMeasurement) { + delayed_measurement_gate = pgate; + } else if (pgate->num_qubits == 1) { unsigned q0 = pgate->qubits[0]; GateFused gate_f = {pgate->kind, pgate->time, 1, {q0}, pgate, {}}; @@ -150,17 +188,66 @@ struct BasicGateFuser final { gates_fused.push_back(std::move(gate_f)); } + + if (delayed_measurement_gate != nullptr) { + auto pgate = delayed_measurement_gate; + + const auto& mea_gates_at_time = measurement_gates[pgate->time]; + + GateFused gate_f = {pgate->kind, pgate->time, 0, {}, pgate, {}}; + + // Fuse measurement gates with equal times. + + for (const auto* pgate : mea_gates_at_time) { + gate_f.num_qubits += pgate->num_qubits; + gate_f.qubits.insert(gate_f.qubits.end(), + pgate->qubits.begin(), pgate->qubits.end()); + } + + gates_fused.push_back(std::move(gate_f)); + } } return gates_fused; } private: + static std::vector MergeWithMeasurementTimes( + const std::vector& gates, const std::vector& times) { + std::vector times2; + times2.reserve(gates.size() + times.size()); + + std::size_t last = 0; + + for (const auto& gate : gates) { + if (gate.kind == gate::kMeasurement && times2.back() < gate.time) { + times2.push_back(gate.time); + } + + if (last < times.size() && gate.time > times[last]) { + while (last < times.size() && times[last] <= gate.time) { + unsigned prev = times[last++]; + times2.push_back(prev); + while (last < times.size() && times[last] <= prev) ++last; + } + } + + if (last == times.size()) break; + } + + if (last < times.size()) { + times2.push_back(times[last]); + } + + return times2; + } + static unsigned Advance(unsigned k, const std::vector& wl, - std::vector& gates) { + std::vector& gates) { while (k < wl.size() && wl[k]->num_qubits == 1 && !wl[k]->unfusible) { gates.push_back(wl[k++]); } + return k; } diff --git a/lib/gate.h b/lib/gate.h index e9b966fc..54f189c6 100644 --- a/lib/gate.h +++ b/lib/gate.h @@ -65,6 +65,15 @@ inline Gate CreateGate(unsigned time, unsigned q0, unsigned q1, return gate; } +template +inline Gate CreateGate(unsigned time, std::vector&& qubits, + std::vector&& matrix = {}, + std::vector&& params = {}) { + return Gate{GateDef::kind, time, static_cast(qubits.size()), + std::move(qubits), std::move(params), std::move(matrix), + false, false}; +} + template using schmidt_decomp_type = std::vector>>; @@ -72,6 +81,25 @@ template schmidt_decomp_type GetSchmidtDecomp( GateKind kind, const std::vector& params); +namespace gate { + +constexpr int kDecomp = 100001; // gate from Schmidt decomposition +constexpr int kMeasurement = 100002; // measurement gate + +template +struct Measurement { + using GateKind = typename Gate::GateKind; + + static constexpr GateKind kind = GateKind::kMeasurement; + static constexpr char name[] = "m"; + + static Gate Create(unsigned time, std::vector&& qubits) { + return CreateGate(time, std::move(qubits)); + } +}; + +} // namespace gate + } // namespace qsim #endif // GATE_H_ diff --git a/lib/gate_appl.h b/lib/gate_appl.h index 8caf5dd8..f2410e12 100644 --- a/lib/gate_appl.h +++ b/lib/gate_appl.h @@ -73,10 +73,10 @@ inline void ApplyGate( const Simulator& simulator, const Gate& gate, State& state) { typename Simulator::fp_type matrix[32]; - if (gate.num_qubits == 1) { + if (gate.num_qubits == 1 && gate.matrix.size() == 8) { std::copy(gate.matrix.begin(), gate.matrix.begin() + 8, matrix); simulator.ApplyGate1(gate.qubits[0], matrix, state); - } else if (gate.num_qubits == 2) { + } else if (gate.num_qubits == 2 && gate.matrix.size() == 32) { std::copy(gate.matrix.begin(), gate.matrix.begin() + 32, matrix); // Here we should have gate.qubits[0] < gate.qubits[1]. @@ -96,16 +96,13 @@ inline void ApplyFusedGate( const Simulator& simulator, const Gate& gate, State& state) { typename Simulator::fp_type matrix[32]; - if (gate.num_qubits == 1) { + if (gate.num_qubits == 1 && gate.pmaster->matrix.size() == 8) { CalcMatrix2(gate.gates, matrix); simulator.ApplyGate1(gate.qubits[0], matrix, state); - } else if (gate.num_qubits == 2) { + } else if (gate.num_qubits == 2 && gate.pmaster->matrix.size() == 32) { // Here we should have gate.qubits[0] < gate.qubits[1]. - unsigned q0 = gate.qubits[0]; - unsigned q1 = gate.qubits[1]; - - CalcMatrix4(q0, q1, gate.gates, matrix); - simulator.ApplyGate2(q0, q1, matrix, state); + CalcMatrix4(gate.qubits[0], gate.qubits[1], gate.gates, matrix); + simulator.ApplyGate2(gate.qubits[0], gate.qubits[1], matrix, state); } } diff --git a/lib/gates_cirq.h b/lib/gates_cirq.h index 72959c9a..75061cef 100644 --- a/lib/gates_cirq.h +++ b/lib/gates_cirq.h @@ -64,7 +64,8 @@ enum GateKind { kFSimGate, kMatrixGate1, // One-qubit matrix gate. kMatrixGate2, // Two-qubit matrix gate. - kGateDecomp, + kDecomp = gate::kDecomp, + kMeasurement = gate::kMeasurement, }; template diff --git a/lib/gates_qsim.h b/lib/gates_qsim.h index b3903939..63d09786 100644 --- a/lib/gates_qsim.h +++ b/lib/gates_qsim.h @@ -46,7 +46,8 @@ enum GateKind { kGateIS, // iSwap kGateFS, // fSim kGateCP, // control phase - kGateDecomp, // single qubit gate from Schmidt decomposition + kDecomp = gate::kDecomp, + kMeasurement = gate::kMeasurement, }; // Specialization of Gate (defined in gate.h) for the qsim gate set. diff --git a/lib/hybrid.h b/lib/hybrid.h index 4910582c..1bd7fdf4 100644 --- a/lib/hybrid.h +++ b/lib/hybrid.h @@ -114,6 +114,11 @@ struct HybridSimulator final { // Split the lattice. for (const auto& gate : gates) { + if (gate.kind == gate::kMeasurement) { + IO::errorf("measurement gates are not suported by qsimh.\n"); + return false; + } + switch (gate.num_qubits) { case 1: // Single qubit gates. switch (parts[gate.qubits[0]]) { @@ -138,20 +143,20 @@ struct HybridSimulator final { gate.params, gate.matrix, false, gate.inverse, nullptr, 0}); break; case 1: // Gate on the cut, qubit 0 in part 1, qubit 1 in part 0. - hd.gates0.emplace_back(GateHybrid{GateKind::kGateDecomp, gate.time, + hd.gates0.emplace_back(GateHybrid{GateKind::kDecomp, gate.time, 1, {hd.qubit_map[gate.qubits[1]]}, gate.params, {}, true, gate.inverse, &gate, hd.num_gatexs}); - hd.gates1.emplace_back(GateHybrid{GateKind::kGateDecomp, gate.time, + hd.gates1.emplace_back(GateHybrid{GateKind::kDecomp, gate.time, 1, {hd.qubit_map[gate.qubits[0]]}, gate.params, {}, true, gate.inverse, &gate, hd.num_gatexs}); ++hd.num_gatexs; break; case 2: // Gate on the cut, qubit 0 in part 0, qubit 1 in part 1. - hd.gates0.emplace_back(GateHybrid{GateKind::kGateDecomp, gate.time, + hd.gates0.emplace_back(GateHybrid{GateKind::kDecomp, gate.time, 1, {hd.qubit_map[gate.qubits[0]]}, gate.params, {}, true, gate.inverse, &gate, hd.num_gatexs}); - hd.gates1.emplace_back(GateHybrid{GateKind::kGateDecomp, gate.time, + hd.gates1.emplace_back(GateHybrid{GateKind::kDecomp, gate.time, 1, {hd.qubit_map[gate.qubits[1]]}, gate.params, {}, true, gate.inverse, &gate, hd.num_gatexs}); diff --git a/lib/parfor.h b/lib/parfor.h index dd130918..cb62d0fa 100644 --- a/lib/parfor.h +++ b/lib/parfor.h @@ -24,6 +24,20 @@ namespace qsim { template struct ParallelForT { + static bool CanBeParallel(uint64_t size) { + return size >= MIN_SIZE; + } + + static uint64_t GetIndex0( + uint64_t size, unsigned num_threads, unsigned thread_id) { + return size >= MIN_SIZE ? size * thread_id / num_threads : 0; + } + + static uint64_t GetIndex1( + uint64_t size, unsigned num_threads, unsigned thread_id) { + return size >= MIN_SIZE ? size * (thread_id + 1) / num_threads : size; + } + template static void Run( unsigned num_threads, uint64_t size, Function&& func, Args&&... args) { @@ -33,8 +47,8 @@ struct ParallelForT { unsigned n = omp_get_num_threads(); unsigned m = omp_get_thread_num(); - uint64_t i0 = size * m / n; - uint64_t i1 = size * (m + 1) / n; + uint64_t i0 = GetIndex0(size, n, m); + uint64_t i1 = GetIndex1(size, n, m); for (uint64_t i = i0; i < i1; ++i) { func(n, m, i, args...); @@ -48,21 +62,21 @@ struct ParallelForT { } template - static typename Op::result_type RunReduce(unsigned num_threads, - uint64_t size, Function&& func, - Op&& op, Args&&... args) { - typename Op::result_type result = 0; + static std::vector RunReduceP( + unsigned num_threads, uint64_t size, Function&& func, const Op& op, + Args&&... args) { + std::vector partial_results; if (num_threads > 1 && size >= MIN_SIZE) { - std::vector partial_results(num_threads, 0); + partial_results.resize(num_threads, 0); #pragma omp parallel num_threads(num_threads) { unsigned n = omp_get_num_threads(); unsigned m = omp_get_thread_num(); - uint64_t i0 = size * m / n; - uint64_t i1 = size * (m + 1) / n; + uint64_t i0 = GetIndex0(size, n, m); + uint64_t i1 = GetIndex1(size, n, m); typename Op::result_type partial_result = 0; @@ -72,14 +86,28 @@ struct ParallelForT { partial_results[m] = partial_result; } - - for (unsigned i = 0; i < num_threads; ++i) { - result = op(result, partial_results[i]); - } } else if (num_threads > 0) { + typename Op::result_type result = 0; for (uint64_t i = 0; i < size; ++i) { result = op(result, func(1, 0, i, args...)); } + + partial_results.resize(1, result); + } + + return partial_results; + } + + template + static typename Op::result_type RunReduce(unsigned num_threads, + uint64_t size, Function&& func, + const Op& op, Args&&... args) { + auto partial_results = RunReduceP(num_threads, size, func, op, args...); + + typename Op::result_type result = 0; + + for (auto partial_result : partial_results) { + result = op(result, partial_result); } return result; diff --git a/lib/run_qsim.h b/lib/run_qsim.h index 55ce34a6..db865952 100644 --- a/lib/run_qsim.h +++ b/lib/run_qsim.h @@ -15,9 +15,11 @@ #ifndef RUN_QSIM_H_ #define RUN_QSIM_H_ +#include #include #include +#include "gate.h" #include "gate_appl.h" #include "util.h" @@ -25,9 +27,11 @@ namespace qsim { // Helper struct to run qsim. -template +template struct QSimRunner final { struct Parameter { + uint64_t seed; // Random number gaenerator seed to perform measurements. unsigned num_threads; unsigned verbosity; }; @@ -66,6 +70,8 @@ struct QSimRunner final { t0 = GetTime(); } + RGen rgen(param.seed); + using StateSpace = typename Simulator::StateSpace; StateSpace state_space(circuit.num_qubits, param.num_threads); @@ -89,11 +95,11 @@ struct QSimRunner final { t1 = GetTime(); } - ApplyFusedGate(simulator, fused_gates[i], state); + ApplyGate(state_space, simulator, fused_gates[i], rgen, state); if (param.verbosity > 1) { double t2 = GetTime(); - IO::messagef("gate %lu done in %g seconds\n", i, t2 - t1); + IO::messagef("gate %lu done in %g seconds.\n", i, t2 - t1); } unsigned t = times_to_measure_at[cur_time_index]; @@ -133,6 +139,11 @@ struct QSimRunner final { t0 = GetTime(); } + RGen rgen(param.seed); + + using StateSpace = typename Simulator::StateSpace; + StateSpace state_space(circuit.num_qubits, param.num_threads); + Simulator simulator(circuit.num_qubits, param.num_threads); auto fused_gates = Fuser::FuseGates(circuit.num_qubits, circuit.gates, @@ -144,11 +155,11 @@ struct QSimRunner final { t1 = GetTime(); } - ApplyFusedGate(simulator, fused_gates[i], state); + ApplyGate(state_space, simulator, fused_gates[i], rgen, state); if (param.verbosity > 1) { double t2 = GetTime(); - IO::messagef("gate %lu done in %g seconds\n", i, t2 - t1); + IO::messagef("gate %lu done in %g seconds.\n", i, t2 - t1); } } @@ -157,6 +168,24 @@ struct QSimRunner final { IO::messagef("time elapsed %g seconds.\n", t2 - t0); } + return true; + } + + private: + template + static bool ApplyGate(const StateSpace& state_space, + const Simulator& simulator, const FGate& fgate, + RGen& rgen, State& state) { + if (fgate.kind == gate::kMeasurement) { + auto result = state_space.Measure(fgate.qubits, rgen, state); + if (!result.valid) { + IO::errorf("measurement failed.\n"); + return false; + } + } else { + ApplyFusedGate(simulator, fgate, state); + } + return true; } }; diff --git a/lib/seqfor.h b/lib/seqfor.h index e8437b84..b0ab4c4b 100644 --- a/lib/seqfor.h +++ b/lib/seqfor.h @@ -16,10 +16,25 @@ #define SEQFOR_H_ #include +#include namespace qsim { struct SequentialFor { + static bool CanBeParallel(uint64_t size) { + return false; + } + + static uint64_t GetIndex0( + uint64_t size, unsigned num_threads, unsigned thread_id) { + return 0; + } + + static uint64_t GetIndex1( + uint64_t size, unsigned num_threads, unsigned thread_id) { + return size; + } + template static void Run( unsigned num_threads, uint64_t size, Function&& func, Args&&... args) { @@ -29,16 +44,23 @@ struct SequentialFor { } template - static typename Op::result_type RunReduce(unsigned num_threads, - uint64_t size, Function&& func, - Op&& op, Args&&... args) { + static std::vector RunReduceP( + unsigned num_threads, uint64_t size, Function&& func, const Op& op, + Args&&... args) { typename Op::result_type result = 0; for (uint64_t i = 0; i < size; ++i) { result = op(result, func(1, 0, i, args...)); } - return result; + return std::vector(1, result); + } + + template + static typename Op::result_type RunReduce(unsigned num_threads, + uint64_t size, Function&& func, + const Op& op, Args&&... args) { + return RunReduceP(num_threads, size, func, op, args...)[0]; } }; diff --git a/lib/statespace.h b/lib/statespace.h index 8cd68b56..18abc659 100644 --- a/lib/statespace.h +++ b/lib/statespace.h @@ -22,6 +22,9 @@ #include #include #include +#include + +#include "util.h" namespace qsim { @@ -38,9 +41,16 @@ class StateSpace { using fp_type = FP; using State = std::unique_ptr; + struct MeasurementResult { + uint64_t mask; + uint64_t bits; + std::vector bitstring; + bool valid; + }; + StateSpace(unsigned num_qubits, unsigned num_threads, uint64_t raw_size) - : size_(uint64_t{1} << num_qubits), raw_size_(raw_size), - num_threads_(num_threads) {} + : num_qubits_(num_qubits), num_threads_(num_threads), + raw_size_(raw_size) {} State CreateState() const { auto vector_size = sizeof(fp_type) * raw_size_; @@ -56,7 +66,7 @@ class StateSpace { #endif } - State CreateState(fp_type* p) const { + static State CreateState(fp_type* p) { return State(p, &detail::do_not_free); } @@ -64,11 +74,11 @@ class StateSpace { return State(nullptr, &free); } - uint64_t Size(const State& state) const { - return size_; + uint64_t Size() const { + return uint64_t{1} << num_qubits_; } - uint64_t RawSize(const State& state) const { + uint64_t RawSize() const { return raw_size_; } @@ -94,9 +104,67 @@ class StateSpace { } protected: - uint64_t size_; - uint64_t raw_size_; + template + double Norm(uint64_t size, Func&& PartialNorms, const State& state) const { + auto partial_norms = PartialNorms(num_threads_, size, state); + + double norm = partial_norms[0]; + for (std::size_t i = 1; i < partial_norms.size(); ++i) { + norm += partial_norms[i]; + } + + return norm; + } + + template + MeasurementResult VirtualMeasure( + const std::vector& qubits, RGen& rgen, const State& state, + uint64_t size, Func1&& PartialNorms, Func2&& FindMeasuredBits) const { + MeasurementResult result; + + result.valid = true; + result.mask = 0; + + for (auto q : qubits) { + if (q >= num_qubits_) { + result.valid = false; + return result; + } + + result.mask |= uint64_t{1} << q; + } + + auto partial_norms = PartialNorms(num_threads_, size, state); + + for (std::size_t i = 1; i < partial_norms.size(); ++i) { + partial_norms[i] += partial_norms[i - 1]; + } + + auto norm = partial_norms.back(); + auto r = RandomValue(rgen, norm); + + unsigned m = 0; + while (r > partial_norms[m]) ++m; + r -= m > 0 ? partial_norms[m - 1] : 0; + + uint64_t k0 = For::GetIndex0(size, num_threads_, m); + uint64_t k1 = For::GetIndex1(size, num_threads_, m); + + result.bits = FindMeasuredBits(k0, k1, r, result.mask, state); + + result.bitstring.reserve(qubits.size()); + result.bitstring.resize(0); + + for (auto q : qubits) { + result.bitstring.push_back((result.bits >> q) & 1); + } + + return result; + } + + unsigned num_qubits_; unsigned num_threads_; + uint64_t raw_size_; }; } // namespace qsim diff --git a/lib/statespace_avx.h b/lib/statespace_avx.h index c19fd049..2c71ff3a 100644 --- a/lib/statespace_avx.h +++ b/lib/statespace_avx.h @@ -28,6 +28,31 @@ namespace qsim { +namespace detail { + +inline __m256i GetZeroMaskAVX(uint64_t i, uint64_t mask, uint64_t bits) { + __m256i s1 = _mm256_setr_epi64x(i + 0, i + 2, i + 4, i + 6); + __m256i s2 = _mm256_setr_epi64x(i + 1, i + 3, i + 5, i + 7); + __m256i ma = _mm256_set1_epi64x(mask); + __m256i bi = _mm256_set1_epi64x(bits); + + s1 = _mm256_and_si256(s1, ma); + s2 = _mm256_and_si256(s2, ma); + + s1 = _mm256_cmpeq_epi64(s1, bi); + s2 = _mm256_cmpeq_epi64(s2, bi); + + return _mm256_blend_epi32(s1, s2, 170); // 10101010 +} + +inline double HorisontalSumAVX(__m256 s) { + float buf[8]; + _mm256_storeu_ps(buf, s); + return buf[0] + buf[1] + buf[2] + buf[3] + buf[4] + buf[5] + buf[6] + buf[7]; +} + +} // namespace detail + // Routines for state-vector manipulations. // State is a vectorized sequence of eight real components followed by eight // imaginary components. Eight single-precison floating numbers can be loaded @@ -40,8 +65,7 @@ struct StateSpaceAVX : public StateSpace { StateSpaceAVX(unsigned num_qubits, unsigned num_threads) : Base(num_qubits, num_threads, - 2 * std::max(uint64_t{8}, uint64_t{1} << num_qubits)), - num_qubits_(num_qubits) {} + 2 * std::max(uint64_t{8}, uint64_t{1} << num_qubits)) {} void InternalToNormalOrder(State& state) const { auto f = [](unsigned n, unsigned m, uint64_t i, State& state) { @@ -61,7 +85,7 @@ struct StateSpaceAVX : public StateSpace { } }; - if (num_qubits_ == 1) { + if (Base::num_qubits_ == 1) { auto s = state.get(); s[2] = s[1]; @@ -71,7 +95,7 @@ struct StateSpaceAVX : public StateSpace { for (uint64_t i = 4; i < 16; ++i) { s[i] = 0; } - } else if (num_qubits_ == 2) { + } else if (Base::num_qubits_ == 2) { auto s = state.get(); s[6] = s[3]; @@ -108,7 +132,7 @@ struct StateSpaceAVX : public StateSpace { } }; - if (num_qubits_ == 1) { + if (Base::num_qubits_ == 1) { auto s = state.get(); s[8] = s[1]; @@ -119,7 +143,7 @@ struct StateSpaceAVX : public StateSpace { s[i] = 0; s[i + 8] = 0; } - } else if (num_qubits_ == 2) { + } else if (Base::num_qubits_ == 2) { auto s = state.get(); s[8] = s[1]; @@ -153,14 +177,12 @@ struct StateSpaceAVX : public StateSpace { // Uniform superposition. void SetStateUniform(State& state) const { - uint64_t size = uint64_t{1} << num_qubits_; - __m256 val0 = _mm256_setzero_ps(); __m256 valu; - fp_type v = double{1} / std::sqrt(size); + fp_type v = double{1} / std::sqrt(Base::Size()); - switch (num_qubits_) { + switch (Base::num_qubits_) { case 1: valu = _mm256_set_ps(0, 0, 0, 0, 0, 0, v, v); break; @@ -206,22 +228,7 @@ struct StateSpaceAVX : public StateSpace { } double Norm(const State& state) const { - using Op = std::plus; - - auto f = [](unsigned n, unsigned m, uint64_t i, - const State& state) -> double { - __m256 re = _mm256_load_ps(state.get() + 16 * i); - __m256 im = _mm256_load_ps(state.get() + 16 * i + 8); - __m256 s1 = _mm256_fmadd_ps(im, im, _mm256_mul_ps(re, re)); - - float buffer[8]; - _mm256_storeu_ps(buffer, s1); - return buffer[0] + buffer[1] + buffer[2] + buffer[3] - + buffer[4] + buffer[5] + buffer[6] + buffer[7]; - }; - - return For::RunReduce( - Base::num_threads_, Base::raw_size_ / 16, f, Op(), state); + return Base::Norm(Base::raw_size_ / 16, PartialNorms, state); } std::complex InnerProduct( @@ -238,15 +245,8 @@ struct StateSpaceAVX : public StateSpace { __m256 ip_re = _mm256_fmadd_ps(im1, im2, _mm256_mul_ps(re1, re2)); __m256 ip_im = _mm256_fnmadd_ps(im1, re2, _mm256_mul_ps(re1, im2)); - float bre[8]; - float bim[8]; - _mm256_storeu_ps(bre, ip_re); - _mm256_storeu_ps(bim, ip_im); - - double re = bre[0] + bre[1] + bre[2] + bre[3] - + bre[4] + bre[5] + bre[6] + bre[7]; - double im = bim[0] + bim[1] + bim[2] + bim[3] - + bim[4] + bim[5] + bim[6] + bim[7]; + double re = detail::HorisontalSumAVX(ip_re); + double im = detail::HorisontalSumAVX(ip_im); return std::complex{re, im}; }; @@ -267,11 +267,7 @@ struct StateSpaceAVX : public StateSpace { __m256 ip_re = _mm256_fmadd_ps(im1, im2, _mm256_mul_ps(re1, re2)); - float bre[8]; - _mm256_storeu_ps(bre, ip_re); - - return bre[0] + bre[1] + bre[2] + bre[3] - + bre[4] + bre[5] + bre[6] + bre[7]; + return detail::HorisontalSumAVX(ip_re); }; return For::RunReduce( @@ -318,8 +314,102 @@ struct StateSpaceAVX : public StateSpace { return bitstrings; } + using MeasurementResult = typename Base::MeasurementResult; + + template + MeasurementResult Measure(const std::vector& qubits, + RGen& rgen, State& state) const { + auto result = VirtualMeasure(qubits, rgen, state); + + if (result.valid) { + CollapseState(result, state); + } + + return result; + } + + template + MeasurementResult VirtualMeasure(const std::vector& qubits, + RGen& rgen, const State& state) const { + return Base::VirtualMeasure(qubits, rgen, state, Base::raw_size_ / 16, + PartialNorms, FindMeasuredBits); + } + + + void CollapseState(MeasurementResult& result, State& state) const { + CollapseState(Base::num_threads_, Base::raw_size_ / 16, result.mask, + result.bits, state); + } + private: - unsigned num_qubits_; + static std::vector PartialNorms( + unsigned num_threads, uint64_t size, const State& state) { + auto f = [](unsigned n, unsigned m, uint64_t i, + const State& state) -> double { + __m256 re = _mm256_load_ps(state.get() + 16 * i); + __m256 im = _mm256_load_ps(state.get() + 16 * i + 8); + __m256 s1 = _mm256_fmadd_ps(im, im, _mm256_mul_ps(re, re)); + + return detail::HorisontalSumAVX(s1); + }; + + using Op = std::plus; + return For::RunReduceP(num_threads, size, f, Op(), state); + } + + static uint64_t FindMeasuredBits(uint64_t k0, uint64_t k1, double r, + uint64_t mask, const State& state) { + double csum = 0; + + for (uint64_t k = k0; k < k1; ++k) { + for (uint64_t j = 0; j < 8; ++j) { + auto re = state.get()[16 * k + j]; + auto im = state.get()[16 * k + 8 + j]; + csum += re * re + im * im; + if (r < csum) { + return (8 * k + j) & mask; + } + } + } + + return 0; + } + + static void CollapseState(unsigned num_threads, uint64_t size, + uint64_t mask, uint64_t bits, State& state) { + auto f1 = [](unsigned n, unsigned m, uint64_t i, + const State& state, uint64_t mask, uint64_t bits) -> double { + __m256i ml = detail::GetZeroMaskAVX(8 * i, mask, bits); + + __m256 re = _mm256_maskload_ps(state.get() + 16 * i, ml); + __m256 im = _mm256_maskload_ps(state.get() + 16 * i + 8, ml); + __m256 s1 = _mm256_fmadd_ps(im, im, _mm256_mul_ps(re, re)); + + return detail::HorisontalSumAVX(s1); + }; + + using Op = std::plus; + double norm = For::RunReduce( + num_threads, size, f1, Op(), state, mask, bits); + + __m256 renorm = _mm256_set1_ps(1.0 / std::sqrt(norm)); + + auto f2 = [](unsigned n, unsigned m, uint64_t i, + State& state, uint64_t mask, uint64_t bits, __m256 renorm) { + __m256i ml = detail::GetZeroMaskAVX(8 * i, mask, bits); + + __m256 re = _mm256_maskload_ps(state.get() + 16 * i, ml); + __m256 im = _mm256_maskload_ps(state.get() + 16 * i + 8, ml); + + re = _mm256_mul_ps(re, renorm); + im = _mm256_mul_ps(im, renorm); + + _mm256_store_ps(state.get() + 16 * i, re); + _mm256_store_ps(state.get() + 16 * i + 8, im); + }; + + For::Run(num_threads, size, f2, state, mask, bits, renorm); + } }; } // namespace qsim diff --git a/lib/statespace_basic.h b/lib/statespace_basic.h index e11f155c..41c196a9 100644 --- a/lib/statespace_basic.h +++ b/lib/statespace_basic.h @@ -47,12 +47,12 @@ struct StateSpaceBasic : public StateSpace { state.get()[2 * i + 1] = 0; }; - For::Run(Base::num_threads_, Base::size_, f, state); + For::Run(Base::num_threads_, Base::raw_size_ / 2, f, state); } // Uniform superposition. void SetStateUniform(State& state) const { - fp_type val = fp_type{1} / std::sqrt(Base::size_); + fp_type val = fp_type{1} / std::sqrt(Base::Size()); auto f = [](unsigned n, unsigned m, uint64_t i, fp_type val, State& state) { @@ -60,7 +60,7 @@ struct StateSpaceBasic : public StateSpace { state.get()[2 * i + 1] = 0; }; - For::Run(Base::num_threads_, Base::size_, f, val, state); + For::Run(Base::num_threads_, Base::raw_size_ / 2, f, val, state); } // |0> state. @@ -88,15 +88,7 @@ struct StateSpaceBasic : public StateSpace { } double Norm(const State& state) const { - using Op = std::plus; - - auto f = [](unsigned n, unsigned m, uint64_t i, - const State& state) -> double { - auto s = state.get() + 2 * i; - return s[0] * s[0] + s[1] * s[1]; - }; - - return For::RunReduce(Base::num_threads_, Base::size_, f, Op(), state); + return Base::Norm(Base::raw_size_ / 2, PartialNorms, state); } std::complex InnerProduct( @@ -115,7 +107,7 @@ struct StateSpaceBasic : public StateSpace { }; return For::RunReduce( - Base::num_threads_, Base::size_, f, Op(), state1, state2); + Base::num_threads_, Base::raw_size_ / 2, f, Op(), state1, state2); } double RealInnerProduct(const State& state1, const State& state2) const { @@ -130,7 +122,7 @@ struct StateSpaceBasic : public StateSpace { }; return For::RunReduce( - Base::num_threads_, Base::size_, f, Op(), state1, state2); + Base::num_threads_, Base::raw_size_ / 2, f, Op(), state1, state2); } template @@ -140,11 +132,12 @@ struct StateSpaceBasic : public StateSpace { if (num_samples > 0) { double norm = 0; - uint64_t size = 2 * Base::size_; - const fp_type* v = state.get(); + uint64_t size = Base::raw_size_ / 2; - for (uint64_t k = 0; k < size; k += 2) { - norm += v[k] * v[k] + v[k + 1] * v[k + 1]; + for (uint64_t k = 0; k < size; ++k) { + auto re = state.get()[2 * k]; + auto im = state.get()[2 * k + 1]; + norm += re * re + im * im; } auto rs = GenerateRandomValues(num_samples, seed, norm); @@ -153,10 +146,12 @@ struct StateSpaceBasic : public StateSpace { double csum = 0; bitstrings.reserve(num_samples); - for (uint64_t k = 0; k < size; k += 2) { - csum += v[k] * v[k] + v[k + 1] * v[k + 1]; + for (uint64_t k = 0; k < size; ++k) { + auto re = state.get()[2 * k]; + auto im = state.get()[2 * k + 1]; + csum += re * re + im * im; while (rs[m] < csum && m < num_samples) { - bitstrings.emplace_back(k / 2); + bitstrings.emplace_back(k); ++m; } } @@ -164,6 +159,88 @@ struct StateSpaceBasic : public StateSpace { return bitstrings; } + + using MeasurementResult = typename Base::MeasurementResult; + + template + MeasurementResult Measure(const std::vector& qubits, + RGen& rgen, State& state) const { + auto result = VirtualMeasure(qubits, rgen, state); + + if (result.valid) { + CollapseState(result, state); + } + + return result; + } + + template + MeasurementResult VirtualMeasure(const std::vector& qubits, + RGen& rgen, const State& state) const { + return Base::VirtualMeasure(qubits, rgen, state, Base::raw_size_ / 2, + PartialNorms, FindMeasuredBits); + } + + + void CollapseState(MeasurementResult& result, State& state) const { + CollapseState(Base::num_threads_, Base::raw_size_ / 2, result.mask, + result.bits, state); + } + + private: + static std::vector PartialNorms( + unsigned num_threads, uint64_t size, const State& state) { + auto f = [](unsigned n, unsigned m, uint64_t i, + const State& state) -> double { + auto s = state.get() + 2 * i; + return s[0] * s[0] + s[1] * s[1]; + }; + + using Op = std::plus; + return For::RunReduceP(num_threads, size, f, Op(), state); + } + + static uint64_t FindMeasuredBits(uint64_t k0, uint64_t k1, double r, + uint64_t mask, const State& state) { + double csum = 0; + + for (uint64_t k = k0; k < k1; ++k) { + auto re = state.get()[2 * k]; + auto im = state.get()[2 * k + 1]; + csum += re * re + im * im; + if (r < csum) { + return k & mask; + } + } + + return 0; + } + + static void CollapseState(unsigned num_threads, uint64_t size, + uint64_t mask, uint64_t bits, State& state) { + auto f1 = [](unsigned n, unsigned m, uint64_t i, const State& state, + uint64_t mask, uint64_t bits) -> double { + auto s = state.get() + 2 * i; + return (i & mask) == bits ? s[0] * s[0] + s[1] * s[1] : 0; + }; + + using Op = std::plus; + double norm = For::RunReduce(num_threads, size, f1, Op(), + state, mask, bits); + + double renorm = 1.0 / std::sqrt(norm); + + auto f2 = [](unsigned n, unsigned m, uint64_t i, State& state, + uint64_t mask, uint64_t bits, fp_type renorm) { + auto s = state.get() + 2 * i; + bool not_zero = (i & mask) == bits; + + s[0] = not_zero ? s[0] * renorm : 0; + s[1] = not_zero ? s[1] * renorm : 0; + }; + + For::Run(num_threads, size, f2, state, mask, bits, renorm); + } }; } // namespace qsim diff --git a/lib/statespace_sse.h b/lib/statespace_sse.h index f7b5bceb..e6e25eb2 100644 --- a/lib/statespace_sse.h +++ b/lib/statespace_sse.h @@ -28,6 +28,31 @@ namespace qsim { +namespace detail { + +inline __m128i GetZeroMaskSSE(uint64_t i, uint64_t mask, uint64_t bits) { + __m128i s1 = _mm_set_epi64x(i + 2, i + 0); + __m128i s2 = _mm_set_epi64x(i + 3, i + 1); + __m128i ma = _mm_set1_epi64x(mask); + __m128i bi = _mm_set1_epi64x(bits); + + s1 = _mm_and_si128(s1, ma); + s2 = _mm_and_si128(s2, ma); + + s1 = _mm_cmpeq_epi64(s1, bi); + s2 = _mm_cmpeq_epi64(s2, bi); + + return _mm_blend_epi16(s1, s2, 204); // 11001100 +} + +inline double HorisontalSumSSE(__m128 s) { + float buf[4]; + _mm_storeu_ps(buf, s); + return buf[0] + buf[1] + buf[2] + buf[3]; +} + +} // namespace detail + // Routines for state-vector manipulations. // State is a vectorized sequence of four real components followed by four // imaginary components. Four single-precison floating numbers can be loaded @@ -40,8 +65,7 @@ struct StateSpaceSSE : public StateSpace { StateSpaceSSE(unsigned num_qubits, unsigned num_threads) : Base(num_qubits, num_threads, - 2 * std::max(uint64_t{4}, uint64_t{1} << num_qubits)), - num_qubits_(num_qubits) {} + 2 * std::max(uint64_t{4}, uint64_t{1} << num_qubits)) {} void InternalToNormalOrder(State& state) const { auto f = [](unsigned n, unsigned m, uint64_t i, State& state) { @@ -61,7 +85,7 @@ struct StateSpaceSSE : public StateSpace { } }; - if (num_qubits_ == 1) { + if (Base::num_qubits_ == 1) { auto s = state.get(); s[2] = s[1]; @@ -94,7 +118,7 @@ struct StateSpaceSSE : public StateSpace { } }; - if (num_qubits_ == 1) { + if (Base::num_qubits_ == 1) { auto s = state.get(); s[4] = s[1]; @@ -124,14 +148,12 @@ struct StateSpaceSSE : public StateSpace { // Uniform superposition. void SetStateUniform(State& state) const { - uint64_t size = uint64_t{1} << num_qubits_; - __m128 val0 = _mm_setzero_ps(); __m128 valu; - fp_type v = double{1} / std::sqrt(size); + fp_type v = double{1} / std::sqrt(Base::Size()); - if (num_qubits_ == 1) { + if (Base::num_qubits_ == 1) { valu = _mm_set_ps(0, 0, v, v); } else { valu = _mm_set1_ps(v); @@ -171,21 +193,7 @@ struct StateSpaceSSE : public StateSpace { } double Norm(const State& state) const { - using Op = std::plus; - - auto f = [](unsigned n, unsigned m, uint64_t i, - const State& state) -> double { - __m128 re = _mm_load_ps(state.get() + 8 * i); - __m128 im = _mm_load_ps(state.get() + 8 * i + 4); - __m128 s1 = _mm_add_ps(_mm_mul_ps(re, re), _mm_mul_ps(im, im)); - - float buffer[4]; - _mm_storeu_ps(buffer, s1); - return buffer[0] + buffer[1] + buffer[2] + buffer[3]; - }; - - return For::RunReduce( - Base::num_threads_, Base::raw_size_ / 8, f, Op(), state); + return Base::Norm(Base::raw_size_ / 8, PartialNorms, state); } std::complex InnerProduct( @@ -202,13 +210,8 @@ struct StateSpaceSSE : public StateSpace { __m128 ip_re = _mm_add_ps(_mm_mul_ps(re1, re2), _mm_mul_ps(im1, im2)); __m128 ip_im = _mm_sub_ps(_mm_mul_ps(re1, im2), _mm_mul_ps(im1, re2)); - float bre[4]; - float bim[4]; - _mm_storeu_ps(bre, ip_re); - _mm_storeu_ps(bim, ip_im); - - double re = bre[0] + bre[1] + bre[2] + bre[3]; - double im = bim[0] + bim[1] + bim[2] + bim[3]; + double re = detail::HorisontalSumSSE(ip_re); + double im = detail::HorisontalSumSSE(ip_im); return std::complex{re, im}; }; @@ -229,10 +232,7 @@ struct StateSpaceSSE : public StateSpace { __m128 ip_re = _mm_add_ps(_mm_mul_ps(re1, re2), _mm_mul_ps(im1, im2)); - float bre[4]; - _mm_storeu_ps(bre, ip_re); - - return bre[0] + bre[1] + bre[2] + bre[3]; + return detail::HorisontalSumSSE(ip_re); }; return For::RunReduce( @@ -279,8 +279,106 @@ struct StateSpaceSSE : public StateSpace { return bitstrings; } + using MeasurementResult = typename Base::MeasurementResult; + + template + MeasurementResult Measure(const std::vector& qubits, + RGen& rgen, State& state) const { + auto result = VirtualMeasure(qubits, rgen, state); + + if (result.valid) { + CollapseState(result, state); + } + + return result; + } + + template + MeasurementResult VirtualMeasure(const std::vector& qubits, + RGen& rgen, const State& state) const { + return Base::VirtualMeasure(qubits, rgen, state, Base::raw_size_ / 8, + PartialNorms, FindMeasuredBits); + } + + + void CollapseState(MeasurementResult& result, State& state) const { + CollapseState(Base::num_threads_, Base::raw_size_ / 8, result.mask, + result.bits, state); + } + private: - unsigned num_qubits_; + static std::vector PartialNorms( + unsigned num_threads, uint64_t size, const State& state) { + auto f = [](unsigned n, unsigned m, uint64_t i, + const State& state) -> double { + __m128 re = _mm_load_ps(state.get() + 8 * i); + __m128 im = _mm_load_ps(state.get() + 8 * i + 4); + __m128 s1 = _mm_add_ps(_mm_mul_ps(re, re), _mm_mul_ps(im, im)); + + return detail::HorisontalSumSSE(s1); + }; + + using Op = std::plus; + return For::RunReduceP(num_threads, size, f, Op(), state); + } + + static uint64_t FindMeasuredBits(uint64_t k0, uint64_t k1, double r, + uint64_t mask, const State& state) { + double csum = 0; + + for (uint64_t k = k0; k < k1; ++k) { + for (uint64_t j = 0; j < 4; ++j) { + auto re = state.get()[8 * k + j]; + auto im = state.get()[8 * k + 4 + j]; + csum += re * re + im * im; + if (r < csum) { + return (4 * k + j) & mask; + } + } + } + + return 0; + } + + static void CollapseState(unsigned num_threads, uint64_t size, + uint64_t mask, uint64_t bits, State& state) { + __m128 zero = _mm_set1_ps(0); + + auto f1 = [](unsigned n, unsigned m, uint64_t i, const State& state, + uint64_t mask, uint64_t bits, __m128 zero) -> double { + __m128 ml = _mm_castsi128_ps(detail::GetZeroMaskSSE(4 * i, mask, bits)); + + __m128 re = _mm_load_ps(state.get() + 8 * i); + __m128 im = _mm_load_ps(state.get() + 8 * i + 4); + __m128 s1 = _mm_add_ps(_mm_mul_ps(re, re), _mm_mul_ps(im, im)); + + s1 = _mm_blendv_ps(zero, s1, ml); + + return detail::HorisontalSumSSE(s1); + }; + + using Op = std::plus; + double norm = For::RunReduce(num_threads, size, f1, Op(), + state, mask, bits, zero); + + __m128 renorm = _mm_set1_ps(1.0 / std::sqrt(norm)); + + auto f2 = [](unsigned n, unsigned m, uint64_t i, State& state, + uint64_t mask, uint64_t bits, __m128 renorm, __m128 zero) { + __m128 ml = _mm_castsi128_ps(detail::GetZeroMaskSSE(4 * i, mask, bits)); + + __m128 re = _mm_load_ps(state.get() + 8 * i); + __m128 im = _mm_load_ps(state.get() + 8 * i + 4); + + re = _mm_blendv_ps(zero, _mm_mul_ps(re, renorm), ml); + im = _mm_blendv_ps(zero, _mm_mul_ps(im, renorm), ml); + + _mm_store_ps(state.get() + 8 * i, re); + _mm_store_ps(state.get() + 8 * i + 4, im); + }; + + For::Run(num_threads, size, f2, state, mask, bits, renorm, zero); + } }; } // namespace qsim diff --git a/lib/util.h b/lib/util.h index 2be72ebd..726a0192 100644 --- a/lib/util.h +++ b/lib/util.h @@ -58,14 +58,20 @@ inline double GetTime() { / steady_clock::period::den; } +template +inline DistrRealType RandomValue(RGen& rgen, DistrRealType max_value) { + std::uniform_real_distribution distr(0.0, max_value); + return distr(rgen); +} + template inline std::vector GenerateRandomValues( - uint64_t num_samples, unsigned seed, double norm) { + uint64_t num_samples, unsigned seed, DistrRealType max_value) { std::vector rs; rs.reserve(num_samples + 1); std::mt19937 rgen(seed); - std::uniform_real_distribution distr(0.0, norm); + std::uniform_real_distribution distr(0.0, max_value); for (uint64_t i = 0; i < num_samples; ++i) { rs.emplace_back(distr(rgen)); @@ -73,7 +79,7 @@ inline std::vector GenerateRandomValues( std::sort(rs.begin(), rs.end()); // Populate the final element to prevent sanitizer errors. - rs.emplace_back(norm); + rs.emplace_back(max_value); return rs; } diff --git a/pybind_interface/pybind_main.h b/pybind_interface/pybind_main.h index b640d4ab..8c2ea19a 100644 --- a/pybind_interface/pybind_main.h +++ b/pybind_interface/pybind_main.h @@ -100,7 +100,6 @@ PYBIND11_MODULE(qsim, m) { .value("kFSimGate", GateKind::kFSimGate) .value("kMatrixGate1", GateKind::kMatrixGate1) .value("kMatrixGate2", GateKind::kMatrixGate2) - .value("kGateDecomp", GateKind::kGateDecomp) .export_values(); m.def("add_gate", &add_gate, "Adds a gate to the given circuit."); diff --git a/tests/circuit_qsim_parser_test.cc b/tests/circuit_qsim_parser_test.cc index 60a44ca2..dd037326 100644 --- a/tests/circuit_qsim_parser_test.cc +++ b/tests/circuit_qsim_parser_test.cc @@ -49,22 +49,23 @@ R"(2 7 id2 0 1 8 cz 0 1 9 is 0 1 -10 fs 0 1 0.2 0.6 -11 cp 0 1 0.5 +10 m 0 1 +11 fs 0 1 0.2 0.6 +12 cp 0 1 0.5 +13 m 0 +14 m 1 )"; Circuit> circuit; std::stringstream ss1(valid_circuit); - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss1, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss1, circuit)); EXPECT_EQ(circuit.num_qubits, 2); - EXPECT_EQ(circuit.gates.size(), 18); + EXPECT_EQ(circuit.gates.size(), 21); std::stringstream ss2(valid_circuit); - EXPECT_EQ( - CircuitQsimParser::FromStream(4, provider, ss2, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(4, provider, ss2, circuit)); EXPECT_EQ(circuit.num_qubits, 2); EXPECT_EQ(circuit.gates.size(), 10); } @@ -79,11 +80,10 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } -TEST(CircuitQsimParserTest, TrailingSpace) { +TEST(CircuitQsimParserTest, TrailingSpace1) { constexpr char invalid_circuit[] = R"(2 0 h 0 @@ -93,8 +93,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, TrailingCharacters) { @@ -107,11 +106,10 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } -TEST(CircuitQsimParserTest, InvalidQubitRange) { +TEST(CircuitQsimParserTest, InvalidQubitRange1) { constexpr char invalid_circuit[] = R"(2 0 h 0 @@ -121,11 +119,10 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } -TEST(CircuitQsimParserTest, QubitIsNotNumber) { +TEST(CircuitQsimParserTest, QubitIsNotNumber1) { constexpr char invalid_circuit[] = R"(2 0 h 0 @@ -135,8 +132,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, SameQubits) { @@ -149,8 +145,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, InvalidSingleQubitGate) { @@ -163,8 +158,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, InvalidTwoQubitGate) { @@ -177,8 +171,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, InvalidRxGate) { @@ -192,8 +185,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, InvalidRyGate) { @@ -207,8 +199,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, InvalidRzGate) { @@ -222,8 +213,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, InvalidRxyGate) { @@ -237,8 +227,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, InvalidFsimGate) { @@ -251,8 +240,7 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } TEST(CircuitQsimParserTest, InvalidCpGate) { @@ -265,8 +253,46 @@ R"(2 std::stringstream ss(invalid_circuit); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(99, provider, ss, circuit), false); + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); +} + +TEST(CircuitQsimParserTest, InvalidQubitRange2) { + constexpr char invalid_circuit[] = +R"(2 +0 h 0 +0 h 1 +1 m 0 2)"; + + std::stringstream ss(invalid_circuit); + Circuit> circuit; + + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); +} + +TEST(CircuitQsimParserTest, QubitIsNotNumber2) { + constexpr char invalid_circuit[] = +R"(2 +0 h 0 +0 h 1 +1 m 0 i)"; + + std::stringstream ss(invalid_circuit); + Circuit> circuit; + + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); +} + +TEST(CircuitQsimParserTest, TrailingSpace2) { + constexpr char invalid_circuit[] = +R"(2 +0 h 0 +0 h 1 +1 m 0 )"; + + std::stringstream ss(invalid_circuit); + Circuit> circuit; + + EXPECT_FALSE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); } } // namespace qsim diff --git a/tests/fuser_basic_test.cc b/tests/fuser_basic_test.cc index 51e90b0b..f2732e39 100644 --- a/tests/fuser_basic_test.cc +++ b/tests/fuser_basic_test.cc @@ -61,7 +61,7 @@ TEST(FuserBasicTest, NoTimesToSplitAt) { std::stringstream ss(circuit_string1); Circuit> circuit; - EXPECT_EQ(CircuitQsimParser::FromStream(99, provider, ss, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); EXPECT_EQ(circuit.num_qubits, 4); EXPECT_EQ(circuit.gates.size(), 27); @@ -226,7 +226,7 @@ TEST(FuserBasicTest, TimesToSplitAt1) { std::stringstream ss(circuit_string1); Circuit> circuit; - EXPECT_EQ(CircuitQsimParser::FromStream(99, provider, ss, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); EXPECT_EQ(circuit.num_qubits, 4); EXPECT_EQ(circuit.gates.size(), 27); @@ -401,7 +401,7 @@ TEST(FuserBasicTest, TimesToSplitAt2) { std::stringstream ss(circuit_string1); Circuit> circuit; - EXPECT_EQ(CircuitQsimParser::FromStream(99, provider, ss, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); EXPECT_EQ(circuit.num_qubits, 4); EXPECT_EQ(circuit.gates.size(), 27); @@ -581,7 +581,7 @@ TEST(FuserBasicTest, OrphanedQubits1) { std::stringstream ss(circuit_string2); Circuit> circuit; - EXPECT_EQ(CircuitQsimParser::FromStream(99, provider, ss, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); EXPECT_EQ(circuit.num_qubits, 3); EXPECT_EQ(circuit.gates.size(), 9); @@ -637,7 +637,7 @@ TEST(FuserBasicTest, OrphanedQubits2) { std::stringstream ss(circuit_string2); Circuit> circuit; - EXPECT_EQ(CircuitQsimParser::FromStream(99, provider, ss, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); EXPECT_EQ(circuit.num_qubits, 3); EXPECT_EQ(circuit.gates.size(), 9); @@ -718,7 +718,7 @@ TEST(FuserBasicTest, UnfusibleSingleQubitGate) { std::stringstream ss(circuit_string2); Circuit> circuit; - EXPECT_EQ(CircuitQsimParser::FromStream(99, provider, ss, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); EXPECT_EQ(circuit.num_qubits, 3); EXPECT_EQ(circuit.gates.size(), 9); @@ -778,6 +778,168 @@ TEST(FuserBasicTest, UnfusibleSingleQubitGate) { EXPECT_EQ(fused_gates[2].gates[3]->qubits[0], 1); } +constexpr char circuit_string3[] = +R"(4 +0 h 0 +0 h 1 +0 h 2 +0 h 3 +1 cz 0 1 +1 m 2 +1 m 3 +2 x 0 +2 y 1 +2 is 2 3 +3 cz 0 1 +3 m 2 3 +4 x 0 +4 y 1 +4 is 2 3 +5 m 2 3 +5 m 0 1 +)"; + +TEST(FuserBasicTest, MeasurementGate) { + std::stringstream ss(circuit_string3); + Circuit> circuit; + + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); + EXPECT_EQ(circuit.num_qubits, 4); + EXPECT_EQ(circuit.gates.size(), 17); + + using Fuser = BasicGateFuser>; + auto fused_gates = Fuser::FuseGates(circuit.num_qubits, circuit.gates, 6); + + EXPECT_EQ(fused_gates.size(), 11); + + EXPECT_EQ(fused_gates[0].kind, kGateCZ); + EXPECT_EQ(fused_gates[0].time, 1); + EXPECT_EQ(fused_gates[0].num_qubits, 2); + EXPECT_EQ(fused_gates[0].qubits[0], 0); + EXPECT_EQ(fused_gates[0].qubits[1], 1); + EXPECT_EQ(fused_gates[0].gates.size(), 3); + EXPECT_EQ(fused_gates[0].gates[0]->kind, kGateHd); + EXPECT_EQ(fused_gates[0].gates[0]->time, 0); + EXPECT_EQ(fused_gates[0].gates[0]->num_qubits, 1); + EXPECT_EQ(fused_gates[0].gates[0]->qubits[0], 0); + EXPECT_EQ(fused_gates[0].gates[1]->kind, kGateHd); + EXPECT_EQ(fused_gates[0].gates[1]->time, 0); + EXPECT_EQ(fused_gates[0].gates[1]->num_qubits, 1); + EXPECT_EQ(fused_gates[0].gates[1]->qubits[0], 1); + EXPECT_EQ(fused_gates[0].gates[2]->kind, kGateCZ); + EXPECT_EQ(fused_gates[0].gates[2]->time, 1); + EXPECT_EQ(fused_gates[0].gates[2]->num_qubits, 2); + EXPECT_EQ(fused_gates[0].gates[2]->qubits[0], 0); + EXPECT_EQ(fused_gates[0].gates[2]->qubits[1], 1); + + EXPECT_EQ(fused_gates[1].kind, kGateHd); + EXPECT_EQ(fused_gates[1].time, 0); + EXPECT_EQ(fused_gates[1].num_qubits, 1); + EXPECT_EQ(fused_gates[1].qubits[0], 2); + EXPECT_EQ(fused_gates[1].gates.size(), 1); + EXPECT_EQ(fused_gates[1].gates[0]->kind, kGateHd); + EXPECT_EQ(fused_gates[1].gates[0]->time, 0); + EXPECT_EQ(fused_gates[1].gates[0]->num_qubits, 1); + EXPECT_EQ(fused_gates[1].gates[0]->qubits[0], 2); + + EXPECT_EQ(fused_gates[2].kind, kGateHd); + EXPECT_EQ(fused_gates[2].time, 0); + EXPECT_EQ(fused_gates[2].num_qubits, 1); + EXPECT_EQ(fused_gates[2].qubits[0], 3); + EXPECT_EQ(fused_gates[2].gates.size(), 1); + EXPECT_EQ(fused_gates[2].gates[0]->kind, kGateHd); + EXPECT_EQ(fused_gates[2].gates[0]->time, 0); + EXPECT_EQ(fused_gates[2].gates[0]->num_qubits, 1); + EXPECT_EQ(fused_gates[2].gates[0]->qubits[0], 3); + + EXPECT_EQ(fused_gates[3].kind, kMeasurement); + EXPECT_EQ(fused_gates[3].time, 1); + EXPECT_EQ(fused_gates[3].num_qubits, 2); + EXPECT_EQ(fused_gates[3].qubits[0], 2); + EXPECT_EQ(fused_gates[3].qubits[1], 3); + EXPECT_EQ(fused_gates[3].gates.size(), 0); + + EXPECT_EQ(fused_gates[4].kind, kGateIS); + EXPECT_EQ(fused_gates[4].time, 2); + EXPECT_EQ(fused_gates[4].num_qubits, 2); + EXPECT_EQ(fused_gates[4].qubits[0], 2); + EXPECT_EQ(fused_gates[4].qubits[1], 3); + EXPECT_EQ(fused_gates[4].gates.size(), 1); + EXPECT_EQ(fused_gates[4].gates[0]->kind, kGateIS); + EXPECT_EQ(fused_gates[4].gates[0]->time, 2); + EXPECT_EQ(fused_gates[4].gates[0]->num_qubits, 2); + EXPECT_EQ(fused_gates[4].gates[0]->qubits[0], 2); + EXPECT_EQ(fused_gates[4].gates[0]->qubits[1], 3); + + EXPECT_EQ(fused_gates[5].kind, kGateCZ); + EXPECT_EQ(fused_gates[5].time, 3); + EXPECT_EQ(fused_gates[5].num_qubits, 2); + EXPECT_EQ(fused_gates[5].qubits[0], 0); + EXPECT_EQ(fused_gates[5].qubits[1], 1); + EXPECT_EQ(fused_gates[5].gates.size(), 3); + EXPECT_EQ(fused_gates[5].gates[0]->kind, kGateX); + EXPECT_EQ(fused_gates[5].gates[0]->time, 2); + EXPECT_EQ(fused_gates[5].gates[0]->num_qubits, 1); + EXPECT_EQ(fused_gates[5].gates[0]->qubits[0], 0); + EXPECT_EQ(fused_gates[5].gates[1]->kind, kGateY); + EXPECT_EQ(fused_gates[5].gates[1]->time, 2); + EXPECT_EQ(fused_gates[5].gates[1]->num_qubits, 1); + EXPECT_EQ(fused_gates[5].gates[1]->qubits[0], 1); + EXPECT_EQ(fused_gates[5].gates[2]->kind, kGateCZ); + EXPECT_EQ(fused_gates[5].gates[2]->time, 3); + EXPECT_EQ(fused_gates[5].gates[2]->num_qubits, 2); + EXPECT_EQ(fused_gates[5].gates[2]->qubits[0], 0); + EXPECT_EQ(fused_gates[5].gates[2]->qubits[1], 1); + + EXPECT_EQ(fused_gates[6].kind, kMeasurement); + EXPECT_EQ(fused_gates[6].time, 3); + EXPECT_EQ(fused_gates[6].num_qubits, 2); + EXPECT_EQ(fused_gates[6].qubits[0], 2); + EXPECT_EQ(fused_gates[6].qubits[1], 3); + EXPECT_EQ(fused_gates[6].gates.size(), 0); + + EXPECT_EQ(fused_gates[7].kind, kGateIS); + EXPECT_EQ(fused_gates[7].time, 4); + EXPECT_EQ(fused_gates[7].num_qubits, 2); + EXPECT_EQ(fused_gates[7].qubits[0], 2); + EXPECT_EQ(fused_gates[7].qubits[1], 3); + EXPECT_EQ(fused_gates[7].gates.size(), 1); + EXPECT_EQ(fused_gates[7].gates[0]->kind, kGateIS); + EXPECT_EQ(fused_gates[7].gates[0]->time, 4); + EXPECT_EQ(fused_gates[7].gates[0]->num_qubits, 2); + EXPECT_EQ(fused_gates[7].gates[0]->qubits[0], 2); + EXPECT_EQ(fused_gates[7].gates[0]->qubits[1], 3); + + EXPECT_EQ(fused_gates[8].kind, kGateX); + EXPECT_EQ(fused_gates[8].time, 4); + EXPECT_EQ(fused_gates[8].num_qubits, 1); + EXPECT_EQ(fused_gates[8].qubits[0], 0); + EXPECT_EQ(fused_gates[8].gates.size(), 1); + EXPECT_EQ(fused_gates[8].gates[0]->kind, kGateX); + EXPECT_EQ(fused_gates[8].gates[0]->time, 4); + EXPECT_EQ(fused_gates[8].gates[0]->num_qubits, 1); + EXPECT_EQ(fused_gates[8].gates[0]->qubits[0], 0); + + EXPECT_EQ(fused_gates[9].kind, kGateY); + EXPECT_EQ(fused_gates[9].time, 4); + EXPECT_EQ(fused_gates[9].num_qubits, 1); + EXPECT_EQ(fused_gates[9].qubits[0], 1); + EXPECT_EQ(fused_gates[9].gates.size(), 1); + EXPECT_EQ(fused_gates[9].gates[0]->kind, kGateY); + EXPECT_EQ(fused_gates[9].gates[0]->time, 4); + EXPECT_EQ(fused_gates[9].gates[0]->num_qubits, 1); + EXPECT_EQ(fused_gates[9].gates[0]->qubits[0], 1); + + EXPECT_EQ(fused_gates[10].kind, kMeasurement); + EXPECT_EQ(fused_gates[10].time, 5); + EXPECT_EQ(fused_gates[10].num_qubits, 4); + EXPECT_EQ(fused_gates[10].qubits[0], 2); + EXPECT_EQ(fused_gates[10].qubits[1], 3); + EXPECT_EQ(fused_gates[10].qubits[2], 0); + EXPECT_EQ(fused_gates[10].qubits[3], 1); + EXPECT_EQ(fused_gates[10].gates.size(), 0); +} + } // namespace qsim int main(int argc, char** argv) { diff --git a/tests/run_qsim_test.cc b/tests/run_qsim_test.cc index 49c56b57..8b4ba6f7 100644 --- a/tests/run_qsim_test.cc +++ b/tests/run_qsim_test.cc @@ -85,7 +85,7 @@ TEST(RunQSimTest, QSimRunner1) { entropy = 0; - for (uint64_t i = 0; i < state_space.Size(state); ++i) { + for (uint64_t i = 0; i < state_space.Size(); ++i) { auto ampl = state_space.GetAmpl(state, i); float p = std::norm(ampl); entropy -= p * std::log(p); @@ -93,6 +93,7 @@ TEST(RunQSimTest, QSimRunner1) { }; Runner::Parameter param; + param.seed = 1; param.num_threads = 1; param.verbosity = 0; @@ -122,6 +123,7 @@ TEST(RunQSimTest, QSimRunner2) { state_space.SetStateZero(state); Runner::Parameter param; + param.seed = 1; param.num_threads = 1; param.verbosity = 0; @@ -131,7 +133,7 @@ TEST(RunQSimTest, QSimRunner2) { float entropy = 0; - for (uint64_t i = 0; i < state_space.Size(state); ++i) { + for (uint64_t i = 0; i < state_space.Size(); ++i) { auto ampl = state_space.GetAmpl(state, i); float p = std::norm(ampl); entropy -= p * std::log(p); @@ -154,17 +156,18 @@ TEST(RunQSimTest, CirqGates) { State state = state_space.CreateState(); EXPECT_FALSE(state_space.IsNull(state)); - EXPECT_EQ(state_space.Size(state), expected_results.size()); + EXPECT_EQ(state_space.Size(), expected_results.size()); state_space.SetStateZero(state); Runner::Parameter param; + param.seed = 1; param.num_threads = 1; param.verbosity = 0; EXPECT_TRUE(Runner::Run(param, 99, circuit, state)); - for (uint64_t i = 0; i < state_space.Size(state); ++i) { + for (uint64_t i = 0; i < state_space.Size(); ++i) { auto ampl = state_space.GetAmpl(state, i); EXPECT_NEAR(std::real(ampl), std::real(expected_results[i]), 2e-6); EXPECT_NEAR(std::imag(ampl), std::imag(expected_results[i]), 2e-6); diff --git a/tests/statespace_avx_test.cc b/tests/statespace_avx_test.cc index d9e3c2e1..db536600 100644 --- a/tests/statespace_avx_test.cc +++ b/tests/statespace_avx_test.cc @@ -46,6 +46,14 @@ TEST(StateSpaceAVXTest, Ordering) { TestOrdering>(); } +TEST(StateSpaceAVXTest, MeasurementSmall) { + TestMeasurementSmall, For>(); +} + +TEST(StateSpaceAVXTest, MeasurementLarge) { + TestMeasurementLarge>(); +} + } // namespace qsim int main(int argc, char** argv) { diff --git a/tests/statespace_basic_test.cc b/tests/statespace_basic_test.cc index a4be90a1..c1723fe9 100644 --- a/tests/statespace_basic_test.cc +++ b/tests/statespace_basic_test.cc @@ -46,6 +46,14 @@ TEST(StateSpaceBasicTest, Ordering) { TestOrdering>(); } +TEST(StateSpaceBasicTest, MeasurementSmall) { + TestMeasurementSmall, For>(); +} + +TEST(StateSpaceBasicTest, MeasurementLarge) { + TestMeasurementLarge>(); +} + } // namespace qsim int main(int argc, char** argv) { diff --git a/tests/statespace_sse_test.cc b/tests/statespace_sse_test.cc index 871b51d4..c980e467 100644 --- a/tests/statespace_sse_test.cc +++ b/tests/statespace_sse_test.cc @@ -46,6 +46,14 @@ TEST(StateSpaceSSETest, Ordering) { TestOrdering>(); } +TEST(StateSpaceSSETest, MeasurementSmall) { + TestMeasurementSmall, For>(); +} + +TEST(StateSpaceSSETest, MeasurementLarge) { + TestMeasurementLarge>(); +} + } // namespace qsim int main(int argc, char** argv) { diff --git a/tests/statespace_testfixture.h b/tests/statespace_testfixture.h index 743748bf..dc9d6018 100644 --- a/tests/statespace_testfixture.h +++ b/tests/statespace_testfixture.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -394,10 +395,10 @@ void TestNormAndInnerProductSmall() { State state1 = state_space.CreateState(); State state2 = state_space.CreateState(); - EXPECT_EQ(state_space.IsNull(state1), false); - EXPECT_EQ(state_space.IsNull(state2), false); - EXPECT_EQ(state_space.Size(state1), size); - EXPECT_EQ(state_space.Size(state2), size); + EXPECT_FALSE(state_space.IsNull(state1)); + EXPECT_FALSE(state_space.IsNull(state2)); + EXPECT_EQ(state_space.Size(), size); + EXPECT_EQ(state_space.Size(), size); state_space.SetAllZeros(state1); state_space.SetAllZeros(state2); @@ -438,7 +439,7 @@ void TestNormAndInnerProduct() { std::stringstream ss(circuit_string); Circuit> circuit; - EXPECT_EQ(CircuitQsimParser::FromStream(depth, provider, ss, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(depth, provider, ss, circuit)); circuit.gates.emplace_back(GateT::Create(depth + 1, 0)); using StateSpace = typename Simulator::StateSpace; @@ -448,7 +449,7 @@ void TestNormAndInnerProduct() { StateSpace state_space(circuit.num_qubits, 1); State state0 = state_space.CreateState(); - EXPECT_EQ(state_space.IsNull(state0), false); + EXPECT_FALSE(state_space.IsNull(state0)); auto measure = [&state0](unsigned k, const StateSpace& state_space, const State& state) { @@ -467,11 +468,12 @@ void TestNormAndInnerProduct() { }; typename Runner::Parameter param; + param.seed = 1; param.num_threads = 1; param.verbosity = 0; std::vector times{depth, depth + 1}; - EXPECT_EQ(Runner::Run(param, times, circuit, measure), true); + EXPECT_TRUE(Runner::Run(param, times, circuit, measure)); } template @@ -485,8 +487,8 @@ void TestSamplingSmall() { StateSpace state_space(num_qubits, 1); State state = state_space.CreateState(); - EXPECT_EQ(state_space.IsNull(state), false); - EXPECT_EQ(state_space.Size(state), size); + EXPECT_FALSE(state_space.IsNull(state)); + EXPECT_EQ(state_space.Size(), size); std::array ps = {0.1, 0.2, 0.13, 0.12, 0.18, 0.15, 0.07, 0.05}; @@ -517,8 +519,7 @@ void TestSamplingCrossEntropyDifference() { std::stringstream ss(circuit_string); Circuit> circuit; - EXPECT_EQ( - CircuitQsimParser::FromStream(depth, provider, ss, circuit), true); + EXPECT_TRUE(CircuitQsimParser::FromStream(depth, provider, ss, circuit)); using StateSpace = typename Simulator::StateSpace; using State = typename StateSpace::State; @@ -527,15 +528,16 @@ void TestSamplingCrossEntropyDifference() { StateSpace state_space(circuit.num_qubits, 1); State state = state_space.CreateState(); - EXPECT_EQ(state_space.IsNull(state), false); + EXPECT_FALSE(state_space.IsNull(state)); state_space.SetStateZero(state); typename Runner::Parameter param; + param.seed = 1; param.num_threads = 1; param.verbosity = 0; - EXPECT_EQ(Runner::Run(param, depth, circuit, state), true); + EXPECT_TRUE(Runner::Run(param, depth, circuit, state)); auto bitstrings = state_space.Sample(state, num_samples, 1); EXPECT_EQ(bitstrings.size(), num_samples); @@ -586,6 +588,169 @@ void TestOrdering() { } } +template +void MeasureSmall(unsigned num_measurements, unsigned num_threads, + unsigned num_qubits, + const std::vector& qubits_to_measure, + const std::vector& ps, RGen& rgen) { + uint64_t size = uint64_t{1} << num_qubits; + + using State = typename StateSpace::State; + + StateSpace state_space(num_qubits, num_threads); + State state = state_space.CreateState(); + + EXPECT_FALSE(state_space.IsNull(state)); + EXPECT_EQ(state_space.Size(), size); + + state_space.SetStateZero(state); + + std::vector> ampls; + ampls.reserve(size); + + std::vector bins(size, 0); + + for (uint64_t i = 0; i < size; ++i) { + float r = std::sqrt(ps[i]); + float re = r * std::cos(i); + float im = r * std::sin(i); + ampls.emplace_back(std::complex{re, im}); + } + + std::vector measured_bits; + + for (unsigned m = 0; m < num_measurements; ++m) { + for (uint64_t i = 0; i < size; ++i) { + state_space.SetAmpl(state, i, std::real(ampls[i]), std::imag(ampls[i])); + } + + auto result = state_space.Measure(qubits_to_measure, rgen, state); + ASSERT_TRUE(result.valid); + + ASSERT_NEAR(state_space.Norm(state), 1, 1e-6); + + uint64_t bin = 0; + for (std::size_t k = 0; k < qubits_to_measure.size(); ++k) { + bin |= uint64_t{result.bitstring[k]} << qubits_to_measure[k]; + } + + EXPECT_EQ(bin, result.bits); + + bins[bin] += 1; + } + + uint64_t mask = 0; + for (std::size_t k = 0; k < qubits_to_measure.size(); ++k) { + mask |= uint64_t{1} << qubits_to_measure[k]; + } + + std::vector expected_ps(size, 0); + + for (uint64_t i = 0; i < size; ++i) { + expected_ps[i & mask] += ps[i]; + } + + for (uint64_t i = 0; i < size; ++i) { + if (expected_ps[i] == 0) { + EXPECT_EQ(bins[i], 0); + } else { + auto p = bins[i] / num_measurements; + auto rel_error = (p - expected_ps[i]) / expected_ps[i]; + EXPECT_LE(rel_error, 0.02); + } + } +} + +template +void TestMeasurementSmall() { + using S = StateSpace; + + constexpr unsigned num_measurements = 200000; + + std::mt19937 rgen(1); + + std::vector ps1 = {0.37, 0.63}; + MeasureSmall(num_measurements, 1, 1, {0}, ps1, rgen); + + std::vector ps2 = {0.22, 0.42, 0.15, 0.21}; + MeasureSmall(num_measurements, 1, 2, {1}, ps2, rgen); + + std::vector ps3 = {0.1, 0.2, 0.13, 0.12, 0.18, 0.15, 0.07, 0.05}; + MeasureSmall(num_measurements, 1, 3, {2}, ps3, rgen); + MeasureSmall(num_measurements, 1, 3, {0, 1, 2}, ps3, rgen); + + std::vector ps5 = { + 0.041, 0.043, 0.028, 0.042, 0.002, 0.008, 0.039, 0.020, + 0.017, 0.030, 0.020, 0.048, 0.020, 0.044, 0.032, 0.048, + 0.025, 0.050, 0.030, 0.001, 0.039, 0.045, 0.005, 0.051, + 0.030, 0.039, 0.012, 0.049, 0.034, 0.029, 0.050, 0.029 + }; + MeasureSmall(num_measurements, 1, 5, {1, 3}, ps5, rgen); + MeasureSmall(num_measurements, 1, 5, {1, 2, 3, 4}, ps5, rgen); +} + +template +void TestMeasurementLarge() { + unsigned depth = 20; + + std::stringstream ss(circuit_string); + Circuit> circuit; + EXPECT_TRUE(CircuitQsimParser::FromStream(depth, provider, ss, circuit)); + + using StateSpace = typename Simulator::StateSpace; + using State = typename StateSpace::State; + using Runner = QSimRunner>, Simulator>; + + StateSpace state_space(circuit.num_qubits, 1); + State state = state_space.CreateState(); + + EXPECT_FALSE(state_space.IsNull(state)); + + state_space.SetStateZero(state); + + typename Runner::Parameter param; + param.seed = 1; + param.num_threads = 1; + param.verbosity = 0; + + EXPECT_TRUE(Runner::Run(param, depth, circuit, state)); + + std::mt19937 rgen(1); + auto result = state_space.Measure({0, 4}, rgen, state); + + EXPECT_TRUE(result.valid); + EXPECT_EQ(result.mask, 17); + EXPECT_NEAR(state_space.Norm(state), 1, 1e-6); + + auto ampl0 = state_space.GetAmpl(state, 0); + EXPECT_NEAR(std::real(ampl0), -0.00208748, 1e-6); + EXPECT_NEAR(std::imag(ampl0), -0.00153427, 1e-6); + + auto ampl2 = state_space.GetAmpl(state, 2); + EXPECT_NEAR(std::real(ampl2), -0.00076403, 1e-6); + EXPECT_NEAR(std::imag(ampl2), 0.00123912, 1e-6); + + auto ampl4 = state_space.GetAmpl(state, 4); + EXPECT_NEAR(std::real(ampl4), -0.00349379, 1e-6); + EXPECT_NEAR(std::imag(ampl4), 0.00110578, 1e-6); + + auto ampl6 = state_space.GetAmpl(state, 6); + EXPECT_NEAR(std::real(ampl6), -0.00180432, 1e-6); + EXPECT_NEAR(std::imag(ampl6), 0.00153727, 1e-6); + + for (uint64_t i = 0; i < 8; ++i) { + auto ampl = state_space.GetAmpl(state, 2 * i + 1); + EXPECT_EQ(std::real(ampl), 0); + EXPECT_EQ(std::imag(ampl), 0); + } + + for (uint64_t i = 16; i < 32; ++i) { + auto ampl = state_space.GetAmpl(state, 2 * i + 1); + EXPECT_EQ(std::real(ampl), 0); + EXPECT_EQ(std::imag(ampl), 0); + } +} + } // namespace qsim #endif // STATESPACE_TESTFIXTURE_H_ From 64d7c94fad7472c8d3ad14990036889278391042 Mon Sep 17 00:00:00 2001 From: Sergei Isakov Date: Wed, 1 Jul 2020 23:41:15 +0200 Subject: [PATCH 2/3] Add measurement gate. Fixes. --- apps/qsim_amplitudes.cc | 2 +- apps/qsim_base.cc | 2 +- apps/qsim_von_neumann.cc | 2 +- lib/fuser_basic.h | 6 +- lib/gate_appl.h | 6 +- lib/hybrid.h | 5 +- lib/parfor.h | 12 ++- lib/run_qsim.h | 6 ++ lib/run_qsimh.h | 7 ++ lib/seqfor.h | 11 +-- lib/statespace.h | 40 ++++++---- lib/statespace_avx.h | 115 +++++++++++------------------ lib/statespace_basic.h | 85 ++++++++------------- lib/statespace_sse.h | 123 ++++++++++++------------------- pybind_interface/pybind_main.cpp | 4 +- tests/fuser_basic_test.cc | 14 ++-- tests/hybrid_test.cc | 3 + tests/run_qsim_test.cc | 6 +- tests/statespace_testfixture.h | 6 +- 19 files changed, 199 insertions(+), 256 deletions(-) diff --git a/apps/qsim_amplitudes.cc b/apps/qsim_amplitudes.cc index 3d0221ee..ef8f6142 100644 --- a/apps/qsim_amplitudes.cc +++ b/apps/qsim_amplitudes.cc @@ -172,7 +172,7 @@ int main(int argc, char* argv[]) { } }; - using Runner = QSimRunner>, Simulator>; + using Runner = QSimRunner>, Simulator>; Runner::Parameter param; param.seed = opt.seed; diff --git a/apps/qsim_base.cc b/apps/qsim_base.cc index 444da62a..2f9c5599 100644 --- a/apps/qsim_base.cc +++ b/apps/qsim_base.cc @@ -112,7 +112,7 @@ int main(int argc, char* argv[]) { using Simulator = qsim::Simulator; using StateSpace = Simulator::StateSpace; using State = StateSpace::State; - using Runner = QSimRunner>, Simulator>; + using Runner = QSimRunner>, Simulator>; StateSpace state_space(circuit.num_qubits, opt.num_threads); State state = state_space.CreateState(); diff --git a/apps/qsim_von_neumann.cc b/apps/qsim_von_neumann.cc index cefec425..60a03499 100644 --- a/apps/qsim_von_neumann.cc +++ b/apps/qsim_von_neumann.cc @@ -113,7 +113,7 @@ int main(int argc, char* argv[]) { IO::messagef("entropy=%g\n", entropy); }; - using Runner = QSimRunner>, Simulator>; + using Runner = QSimRunner>, Simulator>; Runner::Parameter param; param.seed = opt.seed; diff --git a/lib/fuser_basic.h b/lib/fuser_basic.h index db4e4553..20c685d0 100644 --- a/lib/fuser_basic.h +++ b/lib/fuser_basic.h @@ -24,7 +24,7 @@ namespace qsim { -template +template struct BasicGateFuser final { using GateFused = qsim::GateFused; @@ -103,8 +103,8 @@ struct BasicGateFuser final { if (gate.time < prev_time) { // This function assumes that gate times are ordered. - // Just stop sielently if this is not the case. - // TODO: report an error here. + // Just stop silently if this is not the case. + IO::errorf("gate times should be ordered.\n"); gates_fused.resize(0); return gates_fused; } diff --git a/lib/gate_appl.h b/lib/gate_appl.h index f2410e12..7630cc65 100644 --- a/lib/gate_appl.h +++ b/lib/gate_appl.h @@ -62,7 +62,8 @@ inline void CalcMatrix4(unsigned q0, unsigned q1, } /** - * Applies the given gate to the simulator state. + * Applies the given gate to the simulator state. Measurement gates should not + * be applied in this function. * @param simulator Simulator object. Provides specific implementations for * applying one- and two-qubit gates. * @param gate The gate to be applied. @@ -85,7 +86,8 @@ inline void ApplyGate( } /** - * Applies the given fused gate to the simulator state. + * Applies the given fused gate to the simulator state. Measurement gates + * should not be applied in this function. * @param simulator Simulator object. Provides specific implementations for * applying one- and two-qubit gates. * @param gate The gate to be applied. diff --git a/lib/hybrid.h b/lib/hybrid.h index 1bd7fdf4..699a219f 100644 --- a/lib/hybrid.h +++ b/lib/hybrid.h @@ -26,7 +26,8 @@ namespace qsim { // Hybrid Feynmann-Schrodiner simulator. -template class FuserT, +template class FuserT, typename Simulator, typename For> struct HybridSimulator final { public: @@ -64,7 +65,7 @@ struct HybridSimulator final { }; public: - using Fuser = FuserT; + using Fuser = FuserT; using GateFused = typename Fuser::GateFused; struct HybridData { diff --git a/lib/parfor.h b/lib/parfor.h index cb62d0fa..99643a7e 100644 --- a/lib/parfor.h +++ b/lib/parfor.h @@ -18,16 +18,13 @@ #include #include +#include #include namespace qsim { template struct ParallelForT { - static bool CanBeParallel(uint64_t size) { - return size >= MIN_SIZE; - } - static uint64_t GetIndex0( uint64_t size, unsigned num_threads, unsigned thread_id) { return size >= MIN_SIZE ? size * thread_id / num_threads : 0; @@ -63,7 +60,7 @@ struct ParallelForT { template static std::vector RunReduceP( - unsigned num_threads, uint64_t size, Function&& func, const Op& op, + unsigned num_threads, uint64_t size, Function&& func, Op&& op, Args&&... args) { std::vector partial_results; @@ -101,8 +98,9 @@ struct ParallelForT { template static typename Op::result_type RunReduce(unsigned num_threads, uint64_t size, Function&& func, - const Op& op, Args&&... args) { - auto partial_results = RunReduceP(num_threads, size, func, op, args...); + Op&& op, Args&&... args) { + auto partial_results = RunReduceP( + num_threads, size, func, std::move(op), args...); typename Op::result_type result = 0; diff --git a/lib/run_qsim.h b/lib/run_qsim.h index db865952..a1ba340a 100644 --- a/lib/run_qsim.h +++ b/lib/run_qsim.h @@ -86,6 +86,9 @@ struct QSimRunner final { auto fused_gates = Fuser::FuseGates(circuit.num_qubits, circuit.gates, times_to_measure_at); + if (fused_gates.size() == 0 && circuit.gates.size() > 0) { + return false; + } unsigned cur_time_index = 0; @@ -148,6 +151,9 @@ struct QSimRunner final { auto fused_gates = Fuser::FuseGates(circuit.num_qubits, circuit.gates, maxtime); + if (fused_gates.size() == 0 && circuit.gates.size() > 0) { + return false; + } // Apply fused gates. for (std::size_t i = 0; i < fused_gates.size(); ++i) { diff --git a/lib/run_qsimh.h b/lib/run_qsimh.h index be7ca730..614ffd34 100644 --- a/lib/run_qsimh.h +++ b/lib/run_qsimh.h @@ -82,7 +82,14 @@ struct QSimHRunner final { } auto fgates0 = Fuser::FuseGates(hd.num_qubits0, hd.gates0, maxtime); + if (fgates0.size() == 0 && hd.gates0.size() > 0) { + return false; + } + auto fgates1 = Fuser::FuseGates(hd.num_qubits1, hd.gates1, maxtime); + if (fgates1.size() == 0 && hd.gates1.size() > 0) { + return false; + } rc = HybridSimulator::Run( param, hd, parts, fgates0, fgates1, bitstrings, results); diff --git a/lib/seqfor.h b/lib/seqfor.h index b0ab4c4b..1e4e9ef1 100644 --- a/lib/seqfor.h +++ b/lib/seqfor.h @@ -16,15 +16,12 @@ #define SEQFOR_H_ #include +#include #include namespace qsim { struct SequentialFor { - static bool CanBeParallel(uint64_t size) { - return false; - } - static uint64_t GetIndex0( uint64_t size, unsigned num_threads, unsigned thread_id) { return 0; @@ -45,7 +42,7 @@ struct SequentialFor { template static std::vector RunReduceP( - unsigned num_threads, uint64_t size, Function&& func, const Op& op, + unsigned num_threads, uint64_t size, Function&& func, Op&& op, Args&&... args) { typename Op::result_type result = 0; @@ -59,8 +56,8 @@ struct SequentialFor { template static typename Op::result_type RunReduce(unsigned num_threads, uint64_t size, Function&& func, - const Op& op, Args&&... args) { - return RunReduceP(num_threads, size, func, op, args...)[0]; + Op&& op, Args&&... args) { + return RunReduceP(num_threads, size, func, std::move(op), args...)[0]; } }; diff --git a/lib/statespace.h b/lib/statespace.h index 18abc659..462c2d67 100644 --- a/lib/statespace.h +++ b/lib/statespace.h @@ -35,7 +35,7 @@ inline void do_not_free(void*) noexcept {} } // namespace detail // Routines for state-vector manipulations. -template +template class StateSpace { public: using fp_type = FP; @@ -103,10 +103,8 @@ class StateSpace { For::Run(num_threads_, raw_size_, f, src, dest); } - protected: - template - double Norm(uint64_t size, Func&& PartialNorms, const State& state) const { - auto partial_norms = PartialNorms(num_threads_, size, state); + double Norm(const State& state) const { + auto partial_norms = static_cast(*this).PartialNorms(state); double norm = partial_norms[0]; for (std::size_t i = 1; i < partial_norms.size(); ++i) { @@ -116,10 +114,22 @@ class StateSpace { return norm; } - template - MeasurementResult VirtualMeasure( - const std::vector& qubits, RGen& rgen, const State& state, - uint64_t size, Func1&& PartialNorms, Func2&& FindMeasuredBits) const { + template + MeasurementResult Measure(const std::vector& qubits, + RGen& rgen, State& state) const { + auto result = + static_cast(*this).VirtualMeasure(qubits, rgen, state); + + if (result.valid) { + static_cast(*this).CollapseState(result, state); + } + + return result; + } + + template + MeasurementResult VirtualMeasure(const std::vector& qubits, + RGen& rgen, const State& state) const { MeasurementResult result; result.valid = true; @@ -134,7 +144,7 @@ class StateSpace { result.mask |= uint64_t{1} << q; } - auto partial_norms = PartialNorms(num_threads_, size, state); + auto partial_norms = static_cast(*this).PartialNorms(state); for (std::size_t i = 1; i < partial_norms.size(); ++i) { partial_norms[i] += partial_norms[i - 1]; @@ -145,12 +155,12 @@ class StateSpace { unsigned m = 0; while (r > partial_norms[m]) ++m; - r -= m > 0 ? partial_norms[m - 1] : 0; - - uint64_t k0 = For::GetIndex0(size, num_threads_, m); - uint64_t k1 = For::GetIndex1(size, num_threads_, m); + if (m > 0) { + r -= partial_norms[m - 1]; + } - result.bits = FindMeasuredBits(k0, k1, r, result.mask, state); + result.bits = static_cast(*this).FindMeasuredBits( + m, r, result.mask, state); result.bitstring.reserve(qubits.size()); result.bitstring.resize(0); diff --git a/lib/statespace_avx.h b/lib/statespace_avx.h index 2c71ff3a..dec779ad 100644 --- a/lib/statespace_avx.h +++ b/lib/statespace_avx.h @@ -45,7 +45,7 @@ inline __m256i GetZeroMaskAVX(uint64_t i, uint64_t mask, uint64_t bits) { return _mm256_blend_epi32(s1, s2, 170); // 10101010 } -inline double HorisontalSumAVX(__m256 s) { +inline double HorizontalSumAVX(__m256 s) { float buf[8]; _mm256_storeu_ps(buf, s); return buf[0] + buf[1] + buf[2] + buf[3] + buf[4] + buf[5] + buf[6] + buf[7]; @@ -58,8 +58,8 @@ inline double HorisontalSumAVX(__m256 s) { // imaginary components. Eight single-precison floating numbers can be loaded // into an AVX register. template -struct StateSpaceAVX : public StateSpace { - using Base = StateSpace; +struct StateSpaceAVX : public StateSpace, For, float> { + using Base = StateSpace, For, float>; using State = typename Base::State; using fp_type = typename Base::fp_type; @@ -227,10 +227,6 @@ struct StateSpaceAVX : public StateSpace { state.get()[p + 8] = im; } - double Norm(const State& state) const { - return Base::Norm(Base::raw_size_ / 16, PartialNorms, state); - } - std::complex InnerProduct( const State& state1, const State& state2) const { using Op = std::plus>; @@ -245,8 +241,8 @@ struct StateSpaceAVX : public StateSpace { __m256 ip_re = _mm256_fmadd_ps(im1, im2, _mm256_mul_ps(re1, re2)); __m256 ip_im = _mm256_fnmadd_ps(im1, re2, _mm256_mul_ps(re1, im2)); - double re = detail::HorisontalSumAVX(ip_re); - double im = detail::HorisontalSumAVX(ip_im); + double re = detail::HorizontalSumAVX(ip_re); + double im = detail::HorizontalSumAVX(ip_im); return std::complex{re, im}; }; @@ -267,7 +263,7 @@ struct StateSpaceAVX : public StateSpace { __m256 ip_re = _mm256_fmadd_ps(im1, im2, _mm256_mul_ps(re1, re2)); - return detail::HorisontalSumAVX(ip_re); + return detail::HorizontalSumAVX(ip_re); }; return For::RunReduce( @@ -316,51 +312,64 @@ struct StateSpaceAVX : public StateSpace { using MeasurementResult = typename Base::MeasurementResult; - template - MeasurementResult Measure(const std::vector& qubits, - RGen& rgen, State& state) const { - auto result = VirtualMeasure(qubits, rgen, state); + void CollapseState(const MeasurementResult& mr, State& state) const { + auto f1 = [](unsigned n, unsigned m, uint64_t i, + const State& state, uint64_t mask, uint64_t bits) -> double { + __m256i ml = detail::GetZeroMaskAVX(8 * i, mask, bits); - if (result.valid) { - CollapseState(result, state); - } + __m256 re = _mm256_maskload_ps(state.get() + 16 * i, ml); + __m256 im = _mm256_maskload_ps(state.get() + 16 * i + 8, ml); + __m256 s1 = _mm256_fmadd_ps(im, im, _mm256_mul_ps(re, re)); - return result; - } + return detail::HorizontalSumAVX(s1); + }; - template - MeasurementResult VirtualMeasure(const std::vector& qubits, - RGen& rgen, const State& state) const { - return Base::VirtualMeasure(qubits, rgen, state, Base::raw_size_ / 16, - PartialNorms, FindMeasuredBits); - } + using Op = std::plus; + double norm = For::RunReduce(Base::num_threads_, Base::raw_size_ / 16, f1, + Op(), state, mr.mask, mr.bits); + + __m256 renorm = _mm256_set1_ps(1.0 / std::sqrt(norm)); + auto f2 = [](unsigned n, unsigned m, uint64_t i, + State& state, uint64_t mask, uint64_t bits, __m256 renorm) { + __m256i ml = detail::GetZeroMaskAVX(8 * i, mask, bits); + + __m256 re = _mm256_maskload_ps(state.get() + 16 * i, ml); + __m256 im = _mm256_maskload_ps(state.get() + 16 * i + 8, ml); - void CollapseState(MeasurementResult& result, State& state) const { - CollapseState(Base::num_threads_, Base::raw_size_ / 16, result.mask, - result.bits, state); + re = _mm256_mul_ps(re, renorm); + im = _mm256_mul_ps(im, renorm); + + _mm256_store_ps(state.get() + 16 * i, re); + _mm256_store_ps(state.get() + 16 * i + 8, im); + }; + + For::Run(Base::num_threads_, Base::raw_size_ / 16, f2, + state, mr.mask, mr.bits, renorm); } - private: - static std::vector PartialNorms( - unsigned num_threads, uint64_t size, const State& state) { + std::vector PartialNorms(const State& state) const { auto f = [](unsigned n, unsigned m, uint64_t i, const State& state) -> double { __m256 re = _mm256_load_ps(state.get() + 16 * i); __m256 im = _mm256_load_ps(state.get() + 16 * i + 8); __m256 s1 = _mm256_fmadd_ps(im, im, _mm256_mul_ps(re, re)); - return detail::HorisontalSumAVX(s1); + return detail::HorizontalSumAVX(s1); }; using Op = std::plus; - return For::RunReduceP(num_threads, size, f, Op(), state); + return For::RunReduceP( + Base::num_threads_, Base::raw_size_ / 16, f, Op(), state); } - static uint64_t FindMeasuredBits(uint64_t k0, uint64_t k1, double r, - uint64_t mask, const State& state) { + uint64_t FindMeasuredBits( + unsigned m, double r, uint64_t mask, const State& state) const { double csum = 0; + uint64_t k0 = For::GetIndex0(Base::raw_size_ / 16, Base::num_threads_, m); + uint64_t k1 = For::GetIndex1(Base::raw_size_ / 16, Base::num_threads_, m); + for (uint64_t k = k0; k < k1; ++k) { for (uint64_t j = 0; j < 8; ++j) { auto re = state.get()[16 * k + j]; @@ -374,42 +383,6 @@ struct StateSpaceAVX : public StateSpace { return 0; } - - static void CollapseState(unsigned num_threads, uint64_t size, - uint64_t mask, uint64_t bits, State& state) { - auto f1 = [](unsigned n, unsigned m, uint64_t i, - const State& state, uint64_t mask, uint64_t bits) -> double { - __m256i ml = detail::GetZeroMaskAVX(8 * i, mask, bits); - - __m256 re = _mm256_maskload_ps(state.get() + 16 * i, ml); - __m256 im = _mm256_maskload_ps(state.get() + 16 * i + 8, ml); - __m256 s1 = _mm256_fmadd_ps(im, im, _mm256_mul_ps(re, re)); - - return detail::HorisontalSumAVX(s1); - }; - - using Op = std::plus; - double norm = For::RunReduce( - num_threads, size, f1, Op(), state, mask, bits); - - __m256 renorm = _mm256_set1_ps(1.0 / std::sqrt(norm)); - - auto f2 = [](unsigned n, unsigned m, uint64_t i, - State& state, uint64_t mask, uint64_t bits, __m256 renorm) { - __m256i ml = detail::GetZeroMaskAVX(8 * i, mask, bits); - - __m256 re = _mm256_maskload_ps(state.get() + 16 * i, ml); - __m256 im = _mm256_maskload_ps(state.get() + 16 * i + 8, ml); - - re = _mm256_mul_ps(re, renorm); - im = _mm256_mul_ps(im, renorm); - - _mm256_store_ps(state.get() + 16 * i, re); - _mm256_store_ps(state.get() + 16 * i + 8, im); - }; - - For::Run(num_threads, size, f2, state, mask, bits, renorm); - } }; } // namespace qsim diff --git a/lib/statespace_basic.h b/lib/statespace_basic.h index 41c196a9..318627b2 100644 --- a/lib/statespace_basic.h +++ b/lib/statespace_basic.h @@ -29,8 +29,8 @@ namespace qsim { // State is a non-vectorized sequence of one real amplitude followed by // one imaginary amplitude. template -struct StateSpaceBasic : public StateSpace { - using Base = StateSpace; +struct StateSpaceBasic : public StateSpace, For, FP> { + using Base = StateSpace, For, FP>; using State = typename Base::State; using fp_type = typename Base::fp_type; @@ -87,10 +87,6 @@ struct StateSpaceBasic : public StateSpace { state.get()[p + 1] = im; } - double Norm(const State& state) const { - return Base::Norm(Base::raw_size_ / 2, PartialNorms, state); - } - std::complex InnerProduct( const State& state1, const State& state2) const { using Op = std::plus>; @@ -162,34 +158,33 @@ struct StateSpaceBasic : public StateSpace { using MeasurementResult = typename Base::MeasurementResult; - template - MeasurementResult Measure(const std::vector& qubits, - RGen& rgen, State& state) const { - auto result = VirtualMeasure(qubits, rgen, state); + void CollapseState(const MeasurementResult& mr, State& state) const { + auto f1 = [](unsigned n, unsigned m, uint64_t i, const State& state, + uint64_t mask, uint64_t bits) -> double { + auto s = state.get() + 2 * i; + return (i & mask) == bits ? s[0] * s[0] + s[1] * s[1] : 0; + }; - if (result.valid) { - CollapseState(result, state); - } + using Op = std::plus; + double norm = For::RunReduce(Base::num_threads_, Base::raw_size_ / 2, f1, + Op(), state, mr.mask, mr.bits); - return result; - } + double renorm = 1.0 / std::sqrt(norm); - template - MeasurementResult VirtualMeasure(const std::vector& qubits, - RGen& rgen, const State& state) const { - return Base::VirtualMeasure(qubits, rgen, state, Base::raw_size_ / 2, - PartialNorms, FindMeasuredBits); - } + auto f2 = [](unsigned n, unsigned m, uint64_t i, State& state, + uint64_t mask, uint64_t bits, fp_type renorm) { + auto s = state.get() + 2 * i; + bool not_zero = (i & mask) == bits; + s[0] = not_zero ? s[0] * renorm : 0; + s[1] = not_zero ? s[1] * renorm : 0; + }; - void CollapseState(MeasurementResult& result, State& state) const { - CollapseState(Base::num_threads_, Base::raw_size_ / 2, result.mask, - result.bits, state); + For::Run(Base::num_threads_, Base::raw_size_ / 2, f2, + state, mr.mask, mr.bits, renorm); } - private: - static std::vector PartialNorms( - unsigned num_threads, uint64_t size, const State& state) { + std::vector PartialNorms(const State& state) const { auto f = [](unsigned n, unsigned m, uint64_t i, const State& state) -> double { auto s = state.get() + 2 * i; @@ -197,13 +192,17 @@ struct StateSpaceBasic : public StateSpace { }; using Op = std::plus; - return For::RunReduceP(num_threads, size, f, Op(), state); + return For::RunReduceP( + Base::num_threads_, Base::raw_size_ / 2, f, Op(), state); } - static uint64_t FindMeasuredBits(uint64_t k0, uint64_t k1, double r, - uint64_t mask, const State& state) { + uint64_t FindMeasuredBits( + unsigned m, double r, uint64_t mask, const State& state) const { double csum = 0; + uint64_t k0 = For::GetIndex0(Base::raw_size_ / 2, Base::num_threads_, m); + uint64_t k1 = For::GetIndex1(Base::raw_size_ / 2, Base::num_threads_, m); + for (uint64_t k = k0; k < k1; ++k) { auto re = state.get()[2 * k]; auto im = state.get()[2 * k + 1]; @@ -215,32 +214,6 @@ struct StateSpaceBasic : public StateSpace { return 0; } - - static void CollapseState(unsigned num_threads, uint64_t size, - uint64_t mask, uint64_t bits, State& state) { - auto f1 = [](unsigned n, unsigned m, uint64_t i, const State& state, - uint64_t mask, uint64_t bits) -> double { - auto s = state.get() + 2 * i; - return (i & mask) == bits ? s[0] * s[0] + s[1] * s[1] : 0; - }; - - using Op = std::plus; - double norm = For::RunReduce(num_threads, size, f1, Op(), - state, mask, bits); - - double renorm = 1.0 / std::sqrt(norm); - - auto f2 = [](unsigned n, unsigned m, uint64_t i, State& state, - uint64_t mask, uint64_t bits, fp_type renorm) { - auto s = state.get() + 2 * i; - bool not_zero = (i & mask) == bits; - - s[0] = not_zero ? s[0] * renorm : 0; - s[1] = not_zero ? s[1] * renorm : 0; - }; - - For::Run(num_threads, size, f2, state, mask, bits, renorm); - } }; } // namespace qsim diff --git a/lib/statespace_sse.h b/lib/statespace_sse.h index e6e25eb2..8b128834 100644 --- a/lib/statespace_sse.h +++ b/lib/statespace_sse.h @@ -45,7 +45,7 @@ inline __m128i GetZeroMaskSSE(uint64_t i, uint64_t mask, uint64_t bits) { return _mm_blend_epi16(s1, s2, 204); // 11001100 } -inline double HorisontalSumSSE(__m128 s) { +inline double HorizontalSumSSE(__m128 s) { float buf[4]; _mm_storeu_ps(buf, s); return buf[0] + buf[1] + buf[2] + buf[3]; @@ -58,8 +58,8 @@ inline double HorisontalSumSSE(__m128 s) { // imaginary components. Four single-precison floating numbers can be loaded // into an SSE register. template -struct StateSpaceSSE : public StateSpace { - using Base = StateSpace; +struct StateSpaceSSE : public StateSpace, For, float> { + using Base = StateSpace, For, float>; using State = typename Base::State; using fp_type = typename Base::fp_type; @@ -192,10 +192,6 @@ struct StateSpaceSSE : public StateSpace { state.get()[p + 4] = im; } - double Norm(const State& state) const { - return Base::Norm(Base::raw_size_ / 8, PartialNorms, state); - } - std::complex InnerProduct( const State& state1, const State& state2) const { using Op = std::plus>; @@ -210,8 +206,8 @@ struct StateSpaceSSE : public StateSpace { __m128 ip_re = _mm_add_ps(_mm_mul_ps(re1, re2), _mm_mul_ps(im1, im2)); __m128 ip_im = _mm_sub_ps(_mm_mul_ps(re1, im2), _mm_mul_ps(im1, re2)); - double re = detail::HorisontalSumSSE(ip_re); - double im = detail::HorisontalSumSSE(ip_im); + double re = detail::HorizontalSumSSE(ip_re); + double im = detail::HorizontalSumSSE(ip_im); return std::complex{re, im}; }; @@ -232,7 +228,7 @@ struct StateSpaceSSE : public StateSpace { __m128 ip_re = _mm_add_ps(_mm_mul_ps(re1, re2), _mm_mul_ps(im1, im2)); - return detail::HorisontalSumSSE(ip_re); + return detail::HorizontalSumSSE(ip_re); }; return For::RunReduce( @@ -281,67 +277,7 @@ struct StateSpaceSSE : public StateSpace { using MeasurementResult = typename Base::MeasurementResult; - template - MeasurementResult Measure(const std::vector& qubits, - RGen& rgen, State& state) const { - auto result = VirtualMeasure(qubits, rgen, state); - - if (result.valid) { - CollapseState(result, state); - } - - return result; - } - - template - MeasurementResult VirtualMeasure(const std::vector& qubits, - RGen& rgen, const State& state) const { - return Base::VirtualMeasure(qubits, rgen, state, Base::raw_size_ / 8, - PartialNorms, FindMeasuredBits); - } - - - void CollapseState(MeasurementResult& result, State& state) const { - CollapseState(Base::num_threads_, Base::raw_size_ / 8, result.mask, - result.bits, state); - } - - private: - static std::vector PartialNorms( - unsigned num_threads, uint64_t size, const State& state) { - auto f = [](unsigned n, unsigned m, uint64_t i, - const State& state) -> double { - __m128 re = _mm_load_ps(state.get() + 8 * i); - __m128 im = _mm_load_ps(state.get() + 8 * i + 4); - __m128 s1 = _mm_add_ps(_mm_mul_ps(re, re), _mm_mul_ps(im, im)); - - return detail::HorisontalSumSSE(s1); - }; - - using Op = std::plus; - return For::RunReduceP(num_threads, size, f, Op(), state); - } - - static uint64_t FindMeasuredBits(uint64_t k0, uint64_t k1, double r, - uint64_t mask, const State& state) { - double csum = 0; - - for (uint64_t k = k0; k < k1; ++k) { - for (uint64_t j = 0; j < 4; ++j) { - auto re = state.get()[8 * k + j]; - auto im = state.get()[8 * k + 4 + j]; - csum += re * re + im * im; - if (r < csum) { - return (4 * k + j) & mask; - } - } - } - - return 0; - } - - static void CollapseState(unsigned num_threads, uint64_t size, - uint64_t mask, uint64_t bits, State& state) { + void CollapseState(const MeasurementResult& mr, State& state) const { __m128 zero = _mm_set1_ps(0); auto f1 = [](unsigned n, unsigned m, uint64_t i, const State& state, @@ -354,12 +290,12 @@ struct StateSpaceSSE : public StateSpace { s1 = _mm_blendv_ps(zero, s1, ml); - return detail::HorisontalSumSSE(s1); + return detail::HorizontalSumSSE(s1); }; using Op = std::plus; - double norm = For::RunReduce(num_threads, size, f1, Op(), - state, mask, bits, zero); + double norm = For::RunReduce(Base::num_threads_, Base::raw_size_ / 8, f1, + Op(), state, mr.mask, mr.bits, zero); __m128 renorm = _mm_set1_ps(1.0 / std::sqrt(norm)); @@ -377,7 +313,44 @@ struct StateSpaceSSE : public StateSpace { _mm_store_ps(state.get() + 8 * i + 4, im); }; - For::Run(num_threads, size, f2, state, mask, bits, renorm, zero); + For::Run(Base::num_threads_, Base::raw_size_ / 8, f2, + state, mr.mask, mr.bits, renorm, zero); + } + + std::vector PartialNorms(const State& state) const { + auto f = [](unsigned n, unsigned m, uint64_t i, + const State& state) -> double { + __m128 re = _mm_load_ps(state.get() + 8 * i); + __m128 im = _mm_load_ps(state.get() + 8 * i + 4); + __m128 s1 = _mm_add_ps(_mm_mul_ps(re, re), _mm_mul_ps(im, im)); + + return detail::HorizontalSumSSE(s1); + }; + + using Op = std::plus; + return For::RunReduceP( + Base::num_threads_, Base::raw_size_ / 8, f, Op(), state); + } + + uint64_t FindMeasuredBits( + unsigned m, double r, uint64_t mask, const State& state) const { + double csum = 0; + + uint64_t k0 = For::GetIndex0(Base::raw_size_ / 8, Base::num_threads_, m); + uint64_t k1 = For::GetIndex1(Base::raw_size_ / 8, Base::num_threads_, m); + + for (uint64_t k = k0; k < k1; ++k) { + for (uint64_t j = 0; j < 4; ++j) { + auto re = state.get()[8 * k + j]; + auto im = state.get()[8 * k + 4 + j]; + csum += re * re + im * im; + if (r < csum) { + return (4 * k + j) & mask; + } + } + } + + return 0; } }; diff --git a/pybind_interface/pybind_main.cpp b/pybind_interface/pybind_main.cpp index a57062f7..04809d91 100644 --- a/pybind_interface/pybind_main.cpp +++ b/pybind_interface/pybind_main.cpp @@ -288,7 +288,7 @@ std::vector> qsim_simulate(const py::dict &options) { } }; - using Runner = QSimRunner>, + using Runner = QSimRunner>, Simulator>; Runner::Parameter param; @@ -323,7 +323,7 @@ py::array_t qsim_simulate_fullstate(const py::dict &options) { using Simulator = qsim::Simulator; using StateSpace = Simulator::StateSpace; using State = StateSpace::State; - using Runner = QSimRunner>, + using Runner = QSimRunner>, Simulator>; Runner::Parameter param; diff --git a/tests/fuser_basic_test.cc b/tests/fuser_basic_test.cc index f2732e39..ae6fbecd 100644 --- a/tests/fuser_basic_test.cc +++ b/tests/fuser_basic_test.cc @@ -65,7 +65,7 @@ TEST(FuserBasicTest, NoTimesToSplitAt) { EXPECT_EQ(circuit.num_qubits, 4); EXPECT_EQ(circuit.gates.size(), 27); - using Fuser = BasicGateFuser>; + using Fuser = BasicGateFuser>; auto fused_gates = Fuser::FuseGates(circuit.num_qubits, circuit.gates, 99); EXPECT_EQ(fused_gates.size(), 5); @@ -232,7 +232,7 @@ TEST(FuserBasicTest, TimesToSplitAt1) { std::vector times_to_split_at{3, 8, 10}; - using Fuser = BasicGateFuser>; + using Fuser = BasicGateFuser>; auto fused_gates = Fuser::FuseGates( circuit.num_qubits, circuit.gates, times_to_split_at); @@ -407,7 +407,7 @@ TEST(FuserBasicTest, TimesToSplitAt2) { std::vector times_to_split_at{2, 10}; - using Fuser = BasicGateFuser>; + using Fuser = BasicGateFuser>; auto fused_gates = Fuser::FuseGates( circuit.num_qubits, circuit.gates, times_to_split_at); @@ -585,7 +585,7 @@ TEST(FuserBasicTest, OrphanedQubits1) { EXPECT_EQ(circuit.num_qubits, 3); EXPECT_EQ(circuit.gates.size(), 9); - using Fuser = BasicGateFuser>; + using Fuser = BasicGateFuser>; auto fused_gates = Fuser::FuseGates(circuit.num_qubits, circuit.gates, 2); EXPECT_EQ(fused_gates.size(), 2); @@ -643,7 +643,7 @@ TEST(FuserBasicTest, OrphanedQubits2) { std::vector times_to_split_at{1, 4}; - using Fuser = BasicGateFuser>; + using Fuser = BasicGateFuser>; auto fused_gates = Fuser::FuseGates( circuit.num_qubits, circuit.gates, times_to_split_at); @@ -725,7 +725,7 @@ TEST(FuserBasicTest, UnfusibleSingleQubitGate) { circuit.gates[1].unfusible = true; circuit.gates[2].unfusible = true; - using Fuser = BasicGateFuser>; + using Fuser = BasicGateFuser>; auto fused_gates = Fuser::FuseGates(circuit.num_qubits, circuit.gates, 2); EXPECT_EQ(fused_gates.size(), 3); @@ -807,7 +807,7 @@ TEST(FuserBasicTest, MeasurementGate) { EXPECT_EQ(circuit.num_qubits, 4); EXPECT_EQ(circuit.gates.size(), 17); - using Fuser = BasicGateFuser>; + using Fuser = BasicGateFuser>; auto fused_gates = Fuser::FuseGates(circuit.num_qubits, circuit.gates, 6); EXPECT_EQ(fused_gates.size(), 11); diff --git a/tests/hybrid_test.cc b/tests/hybrid_test.cc index 1cc55860..ef4aae4c 100644 --- a/tests/hybrid_test.cc +++ b/tests/hybrid_test.cc @@ -266,6 +266,9 @@ R"(4 auto fgates0 = Fuser::FuseGates(hd.num_qubits0, hd.gates0, 99); auto fgates1 = Fuser::FuseGates(hd.num_qubits1, hd.gates1, 99); + EXPECT_EQ(fgates0.size(), 10); + EXPECT_EQ(fgates1.size(), 10); + HybridSimulator::Parameter param; param.prefix = 1; param.num_prefix_gatexs = 2; diff --git a/tests/run_qsim_test.cc b/tests/run_qsim_test.cc index 8b4ba6f7..124c253a 100644 --- a/tests/run_qsim_test.cc +++ b/tests/run_qsim_test.cc @@ -75,7 +75,7 @@ TEST(RunQSimTest, QSimRunner1) { using Simulator = Simulator; using StateSpace = Simulator::StateSpace; using State = StateSpace::State; - using Runner = QSimRunner>, Simulator>; + using Runner = QSimRunner>, Simulator>; float entropy = 0; @@ -113,7 +113,7 @@ TEST(RunQSimTest, QSimRunner2) { using Simulator = Simulator; using StateSpace = Simulator::StateSpace; using State = StateSpace::State; - using Runner = QSimRunner>, Simulator>; + using Runner = QSimRunner>, Simulator>; StateSpace state_space(circuit.num_qubits, 1); State state = state_space.CreateState(); @@ -149,7 +149,7 @@ TEST(RunQSimTest, CirqGates) { using Simulator = Simulator; using StateSpace = Simulator::StateSpace; using State = StateSpace::State; - using Runner = QSimRunner>, + using Runner = QSimRunner>, Simulator>; StateSpace state_space(circuit.num_qubits, 1); diff --git a/tests/statespace_testfixture.h b/tests/statespace_testfixture.h index dc9d6018..8faf1639 100644 --- a/tests/statespace_testfixture.h +++ b/tests/statespace_testfixture.h @@ -444,7 +444,7 @@ void TestNormAndInnerProduct() { using StateSpace = typename Simulator::StateSpace; using State = typename StateSpace::State; - using Runner = QSimRunner>, Simulator>; + using Runner = QSimRunner>, Simulator>; StateSpace state_space(circuit.num_qubits, 1); State state0 = state_space.CreateState(); @@ -523,7 +523,7 @@ void TestSamplingCrossEntropyDifference() { using StateSpace = typename Simulator::StateSpace; using State = typename StateSpace::State; - using Runner = QSimRunner>, Simulator>; + using Runner = QSimRunner>, Simulator>; StateSpace state_space(circuit.num_qubits, 1); State state = state_space.CreateState(); @@ -699,7 +699,7 @@ void TestMeasurementLarge() { using StateSpace = typename Simulator::StateSpace; using State = typename StateSpace::State; - using Runner = QSimRunner>, Simulator>; + using Runner = QSimRunner>, Simulator>; StateSpace state_space(circuit.num_qubits, 1); State state = state_space.CreateState(); From 0244439006529b11cac7b9a3d0156087ae1657ea Mon Sep 17 00:00:00 2001 From: Sergei Isakov Date: Mon, 6 Jul 2020 19:40:15 +0200 Subject: [PATCH 3/3] Add measurement gate. Comment. --- lib/parfor.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/parfor.h b/lib/parfor.h index 99643a7e..a135fa4c 100644 --- a/lib/parfor.h +++ b/lib/parfor.h @@ -25,6 +25,9 @@ namespace qsim { template struct ParallelForT { + // GetIndex0 and GetIndex1 are useful when we need to know how work was + // divided between threads, for instance, for reusing partial sums obtained + // by RunReduceP. static uint64_t GetIndex0( uint64_t size, unsigned num_threads, unsigned thread_id) { return size >= MIN_SIZE ? size * thread_id / num_threads : 0;