Skip to content

Commit

Permalink
Merge pull request #143 from quantumlib/stateful-for
Browse files Browse the repository at this point in the history
Convert to stateful for.
  • Loading branch information
sergeisakov authored Jul 7, 2020
2 parents 8ad553c + 6a8c82d commit 5b6e26a
Show file tree
Hide file tree
Showing 13 changed files with 130 additions and 139 deletions.
4 changes: 2 additions & 2 deletions apps/qsim_von_neumann.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ int main(int argc, char* argv[]) {
return p != 0 ? p * std::log(p) : 0;
};

double entropy = -For::RunReduce(opt.num_threads, state_space.Size(), f,
Op(), state_space, state);
double entropy = -For(opt.num_threads).RunReduce(state_space.Size(), f,
Op(), state_space, state);
IO::messagef("entropy=%g\n", entropy);
};

Expand Down
21 changes: 13 additions & 8 deletions lib/hybrid.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ struct HybridSimulator final {
unsigned verbosity = 0;
};

template <typename... Args>
explicit HybridSimulator(Args&&... args) : for_(args...) {}

/**
* Splits the lattice into two parts, using Schmidt decomposition for gates
* on the cut.
Expand Down Expand Up @@ -242,12 +245,12 @@ struct HybridSimulator final {
* will be populated with amplitudes for each state in 'bitstrings'.
* @return True if the simulation completed successfully; false otherwise.
*/
static bool Run(const Parameter& param, HybridData& hd,
const std::vector<unsigned>& parts,
const std::vector<GateFused>& fgates0,
const std::vector<GateFused>& fgates1,
const std::vector<uint64_t>& bitstrings,
std::vector<std::complex<fp_type>>& results) {
bool Run(const Parameter& param, HybridData& hd,
const std::vector<unsigned>& parts,
const std::vector<GateFused>& fgates0,
const std::vector<GateFused>& fgates1,
const std::vector<uint64_t>& bitstrings,
std::vector<std::complex<fp_type>>& results) const {
unsigned num_p_gates = param.num_prefix_gatexs;
unsigned num_pr_gates = num_p_gates + param.num_root_gatexs;

Expand Down Expand Up @@ -374,8 +377,8 @@ struct HybridSimulator final {
};

// Collect results.
For::Run(param.num_threads, results.size(), f, sspace0, sspace1,
*rstate0, *rstate1, indices, results);
for_.Run(results.size(), f, sspace0, sspace1, *rstate0, *rstate1,
indices, results);
}
}

Expand Down Expand Up @@ -541,6 +544,8 @@ struct HybridSimulator final {

return true;
}

For for_;
};

} // namespace qsim
Expand Down
34 changes: 16 additions & 18 deletions lib/parfor.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,29 @@ namespace qsim {

template <uint64_t MIN_SIZE>
struct ParallelForT {
explicit ParallelForT(unsigned num_threads) : num_threads(num_threads) {}

// GetIndex0 and GetIndex1 are useful when we need to know how work was
// divided between threads, for instance, for reusing partial sums obtained
// by RunReduceP.
static uint64_t GetIndex0(
uint64_t size, unsigned num_threads, unsigned thread_id) {
uint64_t GetIndex0(uint64_t size, unsigned thread_id) const {
return size >= MIN_SIZE ? size * thread_id / num_threads : 0;
}

static uint64_t GetIndex1(
uint64_t size, unsigned num_threads, unsigned thread_id) {
uint64_t GetIndex1(uint64_t size, unsigned thread_id) const {
return size >= MIN_SIZE ? size * (thread_id + 1) / num_threads : size;
}

template <typename Function, typename... Args>
static void Run(
unsigned num_threads, uint64_t size, Function&& func, Args&&... args) {
void Run(uint64_t size, Function&& func, Args&&... args) const {
if (num_threads > 1 && size >= MIN_SIZE) {
#pragma omp parallel num_threads(num_threads)
{
unsigned n = omp_get_num_threads();
unsigned m = omp_get_thread_num();

uint64_t i0 = GetIndex0(size, n, m);
uint64_t i1 = GetIndex1(size, n, m);
uint64_t i0 = GetIndex0(size, m);
uint64_t i1 = GetIndex1(size, m);

for (uint64_t i = i0; i < i1; ++i) {
func(n, m, i, args...);
Expand All @@ -62,9 +61,8 @@ struct ParallelForT {
}

template <typename Function, typename Op, typename... Args>
static std::vector<typename Op::result_type> RunReduceP(
unsigned num_threads, uint64_t size, Function&& func, Op&& op,
Args&&... args) {
std::vector<typename Op::result_type> RunReduceP(
uint64_t size, Function&& func, Op&& op, Args&&... args) const {
std::vector<typename Op::result_type> partial_results;

if (num_threads > 1 && size >= MIN_SIZE) {
Expand All @@ -75,8 +73,8 @@ struct ParallelForT {
unsigned n = omp_get_num_threads();
unsigned m = omp_get_thread_num();

uint64_t i0 = GetIndex0(size, n, m);
uint64_t i1 = GetIndex1(size, n, m);
uint64_t i0 = GetIndex0(size, m);
uint64_t i1 = GetIndex1(size, m);

typename Op::result_type partial_result = 0;

Expand All @@ -99,11 +97,9 @@ struct ParallelForT {
}

template <typename Function, typename Op, typename... Args>
static typename Op::result_type RunReduce(unsigned num_threads,
uint64_t size, Function&& func,
Op&& op, Args&&... args) {
auto partial_results = RunReduceP(
num_threads, size, func, std::move(op), args...);
typename Op::result_type RunReduce(uint64_t size, Function&& func,
Op&& op, Args&&... args) const {
auto partial_results = RunReduceP(size, func, std::move(op), args...);

typename Op::result_type result = 0;

Expand All @@ -113,6 +109,8 @@ struct ParallelForT {

return result;
}

unsigned num_threads;
};

using ParallelFor = ParallelForT<1024>;
Expand Down
3 changes: 1 addition & 2 deletions lib/run_qsimh.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ struct QSimHRunner final {
return false;
}

rc = HybridSimulator::Run(
rc = HybridSimulator(param.num_threads).Run(
param, hd, parts, fgates0, fgates1, bitstrings, results);

if (rc && param.verbosity > 0) {
Expand All @@ -112,7 +112,6 @@ struct QSimHRunner final {
IO::messagef("breakup: %up+%ur+%us\n", param.num_prefix_gatexs,
param.num_root_gatexs, num_suffix_gates);
}

};

} // namespace qsim
Expand Down
21 changes: 10 additions & 11 deletions lib/seqfor.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,28 @@
namespace qsim {

struct SequentialFor {
static uint64_t GetIndex0(
uint64_t size, unsigned num_threads, unsigned thread_id) {
explicit SequentialFor(unsigned num_threads) {}

// SequentialFor does not have any state. So all its methods can be static.

static uint64_t GetIndex0(uint64_t size, unsigned thread_id) {
return 0;
}

static uint64_t GetIndex1(
uint64_t size, unsigned num_threads, unsigned thread_id) {
static uint64_t GetIndex1(uint64_t size, unsigned thread_id) {
return size;
}

template <typename Function, typename... Args>
static void Run(
unsigned num_threads, uint64_t size, Function&& func, Args&&... args) {
static void Run(uint64_t size, Function&& func, Args&&... args) {
for (uint64_t i = 0; i < size; ++i) {
func(1, 0, i, args...);
}
}

template <typename Function, typename Op, typename... Args>
static std::vector<typename Op::result_type> RunReduceP(
unsigned num_threads, uint64_t size, Function&& func, Op&& op,
Args&&... args) {
uint64_t size, Function&& func, Op&& op, Args&&... args) {
typename Op::result_type result = 0;

for (uint64_t i = 0; i < size; ++i) {
Expand All @@ -54,10 +54,9 @@ struct SequentialFor {
}

template <typename Function, typename Op, typename... Args>
static typename Op::result_type RunReduce(unsigned num_threads,
uint64_t size, Function&& func,
static typename Op::result_type RunReduce(uint64_t size, Function&& func,
Op&& op, Args&&... args) {
return RunReduceP(num_threads, size, func, std::move(op), args...)[0];
return RunReduceP(size, func, std::move(op), args...)[0];
}
};

Expand Down
22 changes: 10 additions & 12 deletions lib/simulator_avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ class SimulatorAVX final {
using State = typename StateSpace::State;
using fp_type = typename StateSpace::fp_type;

SimulatorAVX(unsigned num_qubits, unsigned num_threads)
: num_qubits_(num_qubits), num_threads_(num_threads) {}
template <typename... Args>
explicit SimulatorAVX(unsigned num_qubits, Args&&... args)
: for_(args...), num_qubits_(num_qubits) {}

/**
* Applies a single-qubit gate using AVX instructions.
Expand Down Expand Up @@ -130,7 +131,7 @@ class SimulatorAVX final {
_mm256_store_ps(rstate + p + 8, in);
};

For::Run(num_threads_, sizei / 16, f, sizek, mask0, mask1, matrix, rstate);
for_.Run(sizei / 16, f, sizek, mask0, mask1, matrix, rstate);
}

// Applies a single-qubit gate for qubit <= 2.
Expand Down Expand Up @@ -223,8 +224,7 @@ class SimulatorAVX final {
_mm256_store_ps(rstate + p + 8, in);
};

For::Run(num_threads_, std::max(uint64_t{1}, sizei / 16), f, q0, ml,
matrix, rstate);
for_.Run(std::max(uint64_t{1}, sizei / 16), f, q0, ml, matrix, rstate);
}

// Applies two-qubit gate for qubit0 > 2 and qubit1 > 2.
Expand Down Expand Up @@ -376,8 +376,7 @@ class SimulatorAVX final {
_mm256_store_ps(rstate + p + 8, in);
};

For::Run(num_threads_, sizei / 16, f, sizej, sizek, mask0, mask1, mask2,
matrix, rstate);
for_.Run(sizei / 16, f, sizej, sizek, mask0, mask1, mask2, matrix, rstate);
}

// Applies a two-qubit gate for qubit0 <= 2 and qubit1 > 2.
Expand Down Expand Up @@ -586,8 +585,7 @@ class SimulatorAVX final {
_mm256_store_ps(rstate + p + 8, in);
};

For::Run(num_threads_, sizei / 16, f, sizej, mask0, mask1, q0, ml,
matrix, rstate);
for_.Run(sizei / 16, f, sizej, mask0, mask1, q0, ml, matrix, rstate);
}

// Applies a two-qubit gate for qubit0 <= 2 and qubit1 <= 2.
Expand Down Expand Up @@ -814,12 +812,12 @@ class SimulatorAVX final {
_mm256_store_ps(rstate + p + 8, in);
};

For::Run(num_threads_, std::max(uint64_t{1}, sizei / 16), f, q,
ml1, ml2, ml3, matrix, rstate);
for_.Run(
std::max(uint64_t{1}, sizei / 16), f, q, ml1, ml2, ml3, matrix, rstate);
}

For for_;
unsigned num_qubits_;
unsigned num_threads_;
};

} // namespace qsim
Expand Down
12 changes: 6 additions & 6 deletions lib/simulator_basic.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ class SimulatorBasic final {
using State = typename StateSpace::State;
using fp_type = typename StateSpace::fp_type;

SimulatorBasic(unsigned num_qubits, unsigned num_threads)
: num_qubits_(num_qubits), num_threads_(num_threads) {}
template <typename... Args>
explicit SimulatorBasic(unsigned num_qubits, Args&&... args)
: for_(args...), num_qubits_(num_qubits) {}

/**
* Applies a single-qubit gate using sparse matrix-vector multiplication.
Expand Down Expand Up @@ -66,7 +67,7 @@ class SimulatorBasic final {
rstate[si1 + 1] = s0r * u[5] + s0i * u[4] + s1r * u[7] + s1i * u[6];
};

For::Run(num_threads_, sizei / 2, f, sizek, mask0, mask1, matrix, rstate);
for_.Run(sizei / 2, f, sizek, mask0, mask1, matrix, rstate);
}

/**
Expand Down Expand Up @@ -129,13 +130,12 @@ class SimulatorBasic final {
+ s2r * u[29] + s2i * u[28] + s3r * u[31] + s3i * u[30];
};

For::Run(num_threads_, sizei / 2, f, sizej, sizek, mask0, mask1, mask2,
matrix, rstate);
for_.Run(sizei / 2, f, sizej, sizek, mask0, mask1, mask2, matrix, rstate);
}

private:
For for_;
unsigned num_qubits_;
unsigned num_threads_;
};

} // namespace qsim
Expand Down
20 changes: 9 additions & 11 deletions lib/simulator_sse.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ class SimulatorSSE final {
using State = typename StateSpace::State;
using fp_type = typename StateSpace::fp_type;

SimulatorSSE(unsigned num_qubits, unsigned num_threads)
: num_qubits_(num_qubits), num_threads_(num_threads) {}
template <typename... Args>
explicit SimulatorSSE(unsigned num_qubits, Args&&... args)
: for_(args...), num_qubits_(num_qubits) {}

/**
* Applies a single-qubit gate using SSE instructions.
Expand Down Expand Up @@ -131,7 +132,7 @@ class SimulatorSSE final {
_mm_store_ps(rstate + p + 4, in);
};

For::Run(num_threads_, sizei / 8, f, sizek, mask0, mask1, matrix, rstate);
for_.Run(sizei / 8, f, sizek, mask0, mask1, matrix, rstate);
}

// Applies a single-qubit gate for qubit <= 1.
Expand Down Expand Up @@ -212,8 +213,7 @@ class SimulatorSSE final {
_mm_store_ps(rstate + p + 4, in);
};

For::Run(num_threads_, std::max(uint64_t{1}, sizei / 8), f, q0,
matrix, rstate);
for_.Run(std::max(uint64_t{1}, sizei / 8), f, q0, matrix, rstate);
}

// Applies two-qubit gate for qubit0 > 1 and qubit1 > 1.
Expand Down Expand Up @@ -365,8 +365,7 @@ class SimulatorSSE final {
_mm_store_ps(rstate + p + 4, in);
};

For::Run(num_threads_, sizei / 8, f, sizej, sizek, mask0, mask1, mask2,
matrix, rstate);
for_.Run(sizei / 8, f, sizej, sizek, mask0, mask1, mask2, matrix, rstate);
}

// Applies a two-qubit gate for qubit0 <= 1 and qubit1 > 1.
Expand Down Expand Up @@ -557,8 +556,7 @@ class SimulatorSSE final {
_mm_store_ps(rstate + p + 4, in);
};

For::Run(num_threads_, sizei / 8, f, sizej, mask0, mask1, q0,
matrix, rstate);
for_.Run(sizei / 8, f, sizej, mask0, mask1, q0, matrix, rstate);
}

// Applies a two-qubit gate for qubit0 = 0 and qubit1 = 1.
Expand Down Expand Up @@ -601,11 +599,11 @@ class SimulatorSSE final {
+ s2r * u[29] + s2i * u[28] + s3r * u[31] + s3i * u[30];
};

For::Run(num_threads_, sizei / 8, f, matrix, rstate);
for_.Run(sizei / 8, f, matrix, rstate);
}

For for_;
unsigned num_qubits_;
unsigned num_threads_;
};

} // namespace qsim
Expand Down
Loading

0 comments on commit 5b6e26a

Please sign in to comment.