diff --git a/.github/workflows/bazeltest.yml b/.github/workflows/bazeltest.yml
index 9b29cda3..a7fb03f1 100644
--- a/.github/workflows/bazeltest.yml
+++ b/.github/workflows/bazeltest.yml
@@ -26,8 +26,8 @@ jobs:
run: git submodule update --init --recursive
- name: Install Bazel on CI
run: |
- wget https://github.com/bazelbuild/bazel/releases/download/0.26.0/bazel_0.26.0-linux-x86_64.deb
- sudo dpkg -i bazel_0.26.0-linux-x86_64.deb
+ wget https://github.com/bazelbuild/bazel/releases/download/3.7.2/bazel_3.7.2-linux-x86_64.deb
+ sudo dpkg -i bazel_3.7.2-linux-x86_64.deb
- name: Run C++ tests
run: |
bazel test --config=${{ matrix.hardware_opt }} \
@@ -52,8 +52,8 @@ jobs:
run: git submodule update --init --recursive
- name: Install Bazel on CI
run: |
- wget https://github.com/bazelbuild/bazel/releases/download/0.26.0/bazel_0.26.0-linux-x86_64.deb
- sudo dpkg -i bazel_0.26.0-linux-x86_64.deb
+ wget https://github.com/bazelbuild/bazel/releases/download/3.7.2/bazel_3.7.2-linux-x86_64.deb
+ sudo dpkg -i bazel_3.7.2-linux-x86_64.deb
- name: Run C++ tests
run: |
bazel test --config=avx --config=openmp \
@@ -69,8 +69,8 @@ jobs:
run: git submodule update --init --recursive
- name: Install Bazel on CI
run: |
- wget https://github.com/bazelbuild/bazel/releases/download/0.26.0/bazel_0.26.0-linux-x86_64.deb
- sudo dpkg -i bazel_0.26.0-linux-x86_64.deb
+ wget https://github.com/bazelbuild/bazel/releases/download/3.7.2/bazel_3.7.2-linux-x86_64.deb
+ sudo dpkg -i bazel_3.7.2-linux-x86_64.deb
- name: Install google-perftools for tcmalloc
run: sudo apt-get install libgoogle-perftools-dev
- name: Run C++ tests
diff --git a/.github/workflows/cirq_compatibility.yml b/.github/workflows/cirq_compatibility.yml
index f9a9df71..f4fd7473 100644
--- a/.github/workflows/cirq_compatibility.yml
+++ b/.github/workflows/cirq_compatibility.yml
@@ -7,7 +7,7 @@ on:
jobs:
consistency:
name: Nightly Compatibility
- runs-on: ubuntu-16.04
+ runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v1
@@ -18,5 +18,7 @@ jobs:
run: pip3 install -U cirq --pre
- name: Install qsim requirements
run: pip3 install -r requirements.txt
+ - name: Install test requirements
+ run: pip3 install -r dev-requirements.txt
- name: Run python tests
run: make run-py-tests
diff --git a/.github/workflows/python_format.yml b/.github/workflows/python_format.yml
index adb5f56f..213abf2e 100644
--- a/.github/workflows/python_format.yml
+++ b/.github/workflows/python_format.yml
@@ -24,9 +24,7 @@ jobs:
with:
python-version: '3.7'
architecture: 'x64'
- - name: Install flynt
- run: cat requirements.txt | grep flynt | xargs pip install
- - name: Install black
- run: cat requirements.txt | grep black | xargs pip install
+ - name: Install dev requirements
+ run: pip install -r dev-requirements.txt
- name: Format
run: check/format-incremental
diff --git a/.github/workflows/release_wheels.yml b/.github/workflows/release_wheels.yml
index 4643ac1e..b209d74e 100644
--- a/.github/workflows/release_wheels.yml
+++ b/.github/workflows/release_wheels.yml
@@ -26,7 +26,7 @@ jobs:
name: win_amd64
architecture: x64
cibw:
- build: "cp*win_amd64"
+ build: "cp36-win_amd64 cp37-win_amd64 cp38-win_amd64 cp39-win_amd64"
env:
CIBW_BUILD: "${{ matrix.cibw.build || '*' }}"
CIBW_ARCHS: "${{ matrix.cibw.arch || 'auto' }}"
@@ -36,7 +36,8 @@ jobs:
# to install latest delocate package
CIBW_DEPENDENCY_VERSIONS: "latest"
# due to package and module name conflict have to temporarily move it away to run tests
- CIBW_BEFORE_TEST: "mv {package}/qsimcirq /tmp"
+ CIBW_BEFORE_TEST: mv {package}/qsimcirq /tmp
+ CIBW_TEST_EXTRAS: "dev"
CIBW_TEST_COMMAND: "pytest {package}/qsimcirq_tests/qsimcirq_test.py && mv /tmp/qsimcirq {package}"
steps:
- uses: actions/checkout@v2
diff --git a/.github/workflows/testing_wheels.yml b/.github/workflows/testing_wheels.yml
index 21e6c41f..fa54cf51 100644
--- a/.github/workflows/testing_wheels.yml
+++ b/.github/workflows/testing_wheels.yml
@@ -42,6 +42,7 @@ jobs:
CIBW_DEPENDENCY_VERSIONS: "latest"
# due to package and module name conflict have to temporarily move it away to run tests
CIBW_BEFORE_TEST: "mv {package}/qsimcirq /tmp"
+ CIBW_TEST_EXTRAS: "dev"
CIBW_TEST_COMMAND: "pytest {package}/qsimcirq_tests/qsimcirq_test.py && mv /tmp/qsimcirq {package}"
steps:
- uses: actions/checkout@v2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d631366c..fba9db45 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,15 @@
cmake_minimum_required(VERSION 3.11)
-project(qsim)
+
+execute_process(COMMAND which nvcc OUTPUT_VARIABLE has_nvcc)
+if(has_nvcc STREQUAL "")
+ project(qsim)
+else()
+ project(qsim LANGUAGES CXX CUDA)
+ ADD_SUBDIRECTORY(pybind_interface/cuda)
+ if(DEFINED ENV{CUQUANTUM_DIR})
+ ADD_SUBDIRECTORY(pybind_interface/custatevec)
+ endif()
+endif()
ADD_SUBDIRECTORY(pybind_interface/sse)
ADD_SUBDIRECTORY(pybind_interface/avx512)
diff --git a/MANIFEST.in b/MANIFEST.in
index 2968589b..4b487267 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
include requirements.txt
+include dev-requirements.txt
include CMakeLists.txt
graft pybind_interface
diff --git a/Makefile b/Makefile
index 49d1370b..7cdaa414 100644
--- a/Makefile
+++ b/Makefile
@@ -11,6 +11,9 @@ CXXFLAGS = -O3 -fopenmp
ARCHFLAGS = -march=native
NVCCFLAGS = -O3
+# CUQUANTUM_DIR should be set.
+CUSTATEVECFLAGS = -I$(CUQUANTUM_DIR)/include -L${CUQUANTUM_DIR}/lib -L$(CUQUANTUM_DIR)/lib64 -lcustatevec -lcublas
+
PYBIND11 = true
export CXX
@@ -18,6 +21,7 @@ export CXXFLAGS
export ARCHFLAGS
export NVCC
export NVCCFLAGS
+export CUSTATEVECFLAGS
ifeq ($(PYBIND11), true)
TARGETS += pybind
@@ -35,6 +39,10 @@ qsim:
qsim-cuda:
$(MAKE) -C apps/ qsim-cuda
+.PHONY: qsim-custatevec
+qsim-custatevec:
+ $(MAKE) -C apps/ qsim-custatevec
+
.PHONY: pybind
pybind:
$(MAKE) -C pybind_interface/ pybind
@@ -47,6 +55,10 @@ cxx-tests: eigen
cuda-tests:
$(MAKE) -C tests/ cuda-tests
+.PHONY: custatevec-tests
+custatevec-tests:
+ $(MAKE) -C tests/ custatevec-tests
+
.PHONY: run-cxx-tests
run-cxx-tests: cxx-tests
$(MAKE) -C tests/ run-cxx-tests
@@ -55,6 +67,10 @@ run-cxx-tests: cxx-tests
run-cuda-tests: cuda-tests
$(MAKE) -C tests/ run-cuda-tests
+.PHONY: run-custatevec-tests
+run-custatevec-tests: custatevec-tests
+ $(MAKE) -C tests/ run-custatevec-tests
+
PYTESTS = $(shell find qsimcirq_tests/ -name '*_test.py')
.PHONY: run-py-tests
diff --git a/README.md b/README.md
index b5e5f6f9..767e4c2c 100644
--- a/README.md
+++ b/README.md
@@ -85,12 +85,14 @@ located in [tests](https://github.com/quantumlib/qsim/tree/master/tests).
Python tests use pytest, and are located in
[qsimcirq_tests](https://github.com/quantumlib/qsim/tree/master/qsimcirq_tests).
-To build and run all tests, navigate to the test directory and run:
+To build and run all tests, run:
```
-make run-all
+make run-tests
```
This will compile all test binaries to files with `.x` extensions, and run each
-test in series. Testing will stop early if a test fails.
+test in series. Testing will stop early if a test fails. It will also run tests
+of the `qsimcirq` python interface. To run C++ or python tests only, run
+`make run-cxx-tests` or `make run-py-tests`, respectively.
To clean up generated test files, run `make clean` from the test directory.
@@ -125,4 +127,4 @@ An equivalent BibTex format reference is below for all the versions:
doi = {10.5281/zenodo.4023103},
url = {https://doi.org/10.5281/zenodo.4023103}
}
-```
\ No newline at end of file
+```
diff --git a/apps/Makefile b/apps/Makefile
index cb16c1c9..41fb81e5 100644
--- a/apps/Makefile
+++ b/apps/Makefile
@@ -1,8 +1,11 @@
CXX_TARGETS = $(shell find . -maxdepth 1 -name '*.cc')
CXX_TARGETS := $(CXX_TARGETS:%.cc=%.x)
-CUDA_TARGETS = $(shell find . -maxdepth 1 -name '*.cu')
-CUDA_TARGETS := $(CUDA_TARGETS:%.cu=%.x)
+CUDA_TARGETS = $(shell find . -maxdepth 1 -name '*cuda.cu')
+CUDA_TARGETS := $(CUDA_TARGETS:%cuda.cu=%cuda.x)
+
+CUSTATEVEC_TARGETS = $(shell find . -maxdepth 1 -name "*custatevec.cu")
+CUSTATEVEC_TARGETS := $(CUSTATEVEC_TARGETS:%custatevec.cu=%custatevec.x)
.PHONY: qsim
qsim: $(CXX_TARGETS)
@@ -10,12 +13,18 @@ qsim: $(CXX_TARGETS)
.PHONY: qsim-cuda
qsim-cuda: $(CUDA_TARGETS)
+.PHONY: qsim-custatevec
+qsim-custatevec: $(CUSTATEVEC_TARGETS)
+
%.x: %.cc
$(CXX) -o ./$@ $< $(CXXFLAGS) $(ARCHFLAGS)
-%.x: %.cu
+%cuda.x: %cuda.cu
$(NVCC) -o ./$@ $< $(NVCCFLAGS)
+%custatevec.x: %custatevec.cu
+ $(NVCC) -o ./$@ $< $(NVCCFLAGS) $(CUSTATEVECFLAGS)
+
.PHONY: clean
clean:
-rm -f ./*.x ./*.a ./*.so ./*.mod
diff --git a/apps/make.sh b/apps/make.sh
index 679694f8..c742e192 100755
--- a/apps/make.sh
+++ b/apps/make.sh
@@ -24,3 +24,8 @@ g++ -O3 -march=native -fopenmp -o qsimh_base.x qsimh_base.cc
g++ -O3 -march=native -fopenmp -o qsimh_amplitudes.x qsimh_amplitudes.cc
nvcc -O3 -o qsim_base_cuda.x qsim_base_cuda.cu
+nvcc -O3 -o qsim_qtrajectory_cuda.x qsim_qtrajectory_cuda.cu
+
+# CUQUANTUM_DIR should be set.
+CUSTATEVECFLAGS="-I${CUQUANTUM_DIR}/include -L${CUQUANTUM_DIR}/lib -L${CUQUANTUM_DIR}/lib64 -lcustatevec -lcublas"
+nvcc -O3 $CUSTATEVECFLAGS -o qsim_base_custatevec.x qsim_base_custatevec.cu
diff --git a/apps/qsim_amplitudes.cc b/apps/qsim_amplitudes.cc
index 29268633..d37fdd6b 100644
--- a/apps/qsim_amplitudes.cc
+++ b/apps/qsim_amplitudes.cc
@@ -30,11 +30,12 @@
#include "../lib/run_qsim.h"
#include "../lib/simmux.h"
#include "../lib/util.h"
+#include "../lib/util_cpu.h"
constexpr char usage[] = "usage:\n ./qsim_amplitudes -c circuit_file "
"-d times_to_save_results -i input_files "
"-o output_files -s seed -t num_threads "
- "-f max_fused_size -v verbosity\n";
+ "-f max_fused_size -v verbosity -z\n";
struct Options {
std::string circuit_file;
@@ -45,6 +46,7 @@ struct Options {
unsigned num_threads = 1;
unsigned max_fused_size = 2;
unsigned verbosity = 0;
+ bool denormals_are_zeros = false;
};
Options GetOptions(int argc, char* argv[]) {
@@ -56,7 +58,7 @@ Options GetOptions(int argc, char* argv[]) {
return std::atoi(word.c_str());
};
- while ((k = getopt(argc, argv, "c:d:i:s:o:t:f:v:")) != -1) {
+ while ((k = getopt(argc, argv, "c:d:i:s:o:t:f:v:z")) != -1) {
switch (k) {
case 'c':
opt.circuit_file = optarg;
@@ -82,6 +84,9 @@ Options GetOptions(int argc, char* argv[]) {
case 'v':
opt.verbosity = std::atoi(optarg);
break;
+ case 'z':
+ opt.denormals_are_zeros = true;
+ break;
default:
qsim::IO::errorf(usage);
exit(1);
@@ -162,6 +167,10 @@ int main(int argc, char* argv[]) {
return 1;
}
+ if (opt.denormals_are_zeros) {
+ SetFlushToZeroAndDenormalsAreZeros();
+ }
+
struct Factory {
Factory(unsigned num_threads) : num_threads(num_threads) {}
diff --git a/apps/qsim_base.cc b/apps/qsim_base.cc
index 062dfdde..5a7604d5 100644
--- a/apps/qsim_base.cc
+++ b/apps/qsim_base.cc
@@ -26,6 +26,11 @@
#include "../lib/io_file.h"
#include "../lib/run_qsim.h"
#include "../lib/simmux.h"
+#include "../lib/util_cpu.h"
+
+constexpr char usage[] = "usage:\n ./qsim_base -c circuit -d maxtime "
+ "-s seed -t threads -f max_fused_size "
+ "-v verbosity -z\n";
struct Options {
std::string circuit_file;
@@ -34,18 +39,15 @@ struct Options {
unsigned num_threads = 1;
unsigned max_fused_size = 2;
unsigned verbosity = 0;
+ bool denormals_are_zeros = false;
};
Options GetOptions(int argc, char* argv[]) {
- constexpr char usage[] = "usage:\n ./qsim_base -c circuit -d maxtime "
- "-s seed -t threads -f max_fused_size "
- "-v verbosity\n";
-
Options opt;
int k;
- while ((k = getopt(argc, argv, "c:d:s:t:f:v:")) != -1) {
+ while ((k = getopt(argc, argv, "c:d:s:t:f:v:z")) != -1) {
switch (k) {
case 'c':
opt.circuit_file = optarg;
@@ -65,6 +67,9 @@ Options GetOptions(int argc, char* argv[]) {
case 'v':
opt.verbosity = std::atoi(optarg);
break;
+ case 'z':
+ opt.denormals_are_zeros = true;
+ break;
default:
qsim::IO::errorf(usage);
exit(1);
@@ -77,6 +82,7 @@ Options GetOptions(int argc, char* argv[]) {
bool ValidateOptions(const Options& opt) {
if (opt.circuit_file.empty()) {
qsim::IO::errorf("circuit file is not provided.\n");
+ qsim::IO::errorf(usage);
return false;
}
@@ -114,6 +120,10 @@ int main(int argc, char* argv[]) {
return 1;
}
+ if (opt.denormals_are_zeros) {
+ SetFlushToZeroAndDenormalsAreZeros();
+ }
+
struct Factory {
Factory(unsigned num_threads) : num_threads(num_threads) {}
diff --git a/apps/qsim_base_custatevec.cu b/apps/qsim_base_custatevec.cu
new file mode 100644
index 00000000..a83f3e46
--- /dev/null
+++ b/apps/qsim_base_custatevec.cu
@@ -0,0 +1,171 @@
+// Copyright 2019 Google LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+
+#include "../lib/circuit_qsim_parser.h"
+#include "../lib/formux.h"
+#include "../lib/fuser_mqubit.h"
+#include "../lib/gates_qsim.h"
+#include "../lib/io_file.h"
+#include "../lib/run_qsim.h"
+#include "../lib/simulator_custatevec.h"
+#include "../lib/util_custatevec.h"
+
+struct Options {
+ std::string circuit_file;
+ unsigned maxtime = std::numeric_limits::max();
+ unsigned seed = 1;
+ unsigned max_fused_size = 2;
+ unsigned verbosity = 0;
+};
+
+Options GetOptions(int argc, char* argv[]) {
+ constexpr char usage[] = "usage:\n ./qsim_base -c circuit -d maxtime "
+ "-s seed -f max_fused_size -v verbosity\n";
+
+ Options opt;
+
+ int k;
+
+ while ((k = getopt(argc, argv, "c:d:s:f:v:")) != -1) {
+ switch (k) {
+ case 'c':
+ opt.circuit_file = optarg;
+ break;
+ case 'd':
+ opt.maxtime = std::atoi(optarg);
+ break;
+ case 's':
+ opt.seed = std::atoi(optarg);
+ break;
+ case 'f':
+ opt.max_fused_size = std::atoi(optarg);
+ break;
+ case 'v':
+ opt.verbosity = std::atoi(optarg);
+ break;
+ default:
+ qsim::IO::errorf(usage);
+ exit(1);
+ }
+ }
+
+ return opt;
+}
+
+bool ValidateOptions(const Options& opt) {
+ if (opt.circuit_file.empty()) {
+ qsim::IO::errorf("circuit file is not provided.\n");
+ return false;
+ }
+
+ return true;
+}
+
+template
+void PrintAmplitudes(
+ unsigned num_qubits, const StateSpace& state_space, const State& state) {
+ static constexpr char const* bits[8] = {
+ "000", "001", "010", "011", "100", "101", "110", "111",
+ };
+
+ uint64_t size = std::min(uint64_t{8}, uint64_t{1} << num_qubits);
+ unsigned s = 3 - std::min(unsigned{3}, num_qubits);
+
+ for (uint64_t i = 0; i < size; ++i) {
+ auto a = state_space.GetAmpl(state, i);
+ qsim::IO::messagef("%s:%16.8g%16.8g%16.8g\n",
+ bits[i] + s, std::real(a), std::imag(a), std::norm(a));
+ }
+}
+
+int main(int argc, char* argv[]) {
+ using namespace qsim;
+
+ auto opt = GetOptions(argc, argv);
+ if (!ValidateOptions(opt)) {
+ return 1;
+ }
+
+ using fp_type = float;
+
+ Circuit> circuit;
+ if (!CircuitQsimParser::FromFile(opt.maxtime, opt.circuit_file,
+ circuit)) {
+ return 1;
+ }
+
+ struct Factory {
+ using Simulator = qsim::SimulatorCuStateVec;
+ using StateSpace = Simulator::StateSpace;
+
+ Factory() {
+ ErrorCheck(cublasCreate(&cublas_handle));
+ ErrorCheck(custatevecCreate(&custatevec_handle));
+ }
+
+ ~Factory() {
+ ErrorCheck(cublasDestroy(cublas_handle));
+ ErrorCheck(custatevecDestroy(custatevec_handle));
+ }
+
+ StateSpace CreateStateSpace() const {
+ return StateSpace(cublas_handle, custatevec_handle);
+ }
+
+ Simulator CreateSimulator() const {
+ return Simulator(custatevec_handle);
+ }
+
+ cublasHandle_t cublas_handle;
+ custatevecHandle_t custatevec_handle;
+ };
+
+ using Simulator = Factory::Simulator;
+ using StateSpace = Simulator::StateSpace;
+ using State = StateSpace::State;
+ using Fuser = MultiQubitGateFuser>;
+ using Runner = QSimRunner;
+
+ Factory factory;
+
+ StateSpace state_space = factory.CreateStateSpace();
+ State state = state_space.Create(circuit.num_qubits);
+
+ if (state_space.IsNull(state)) {
+ IO::errorf("not enough memory: is the number of qubits too large?\n");
+ return 1;
+ }
+
+ state_space.SetStateZero(state);
+
+ Runner::Parameter param;
+ param.max_fused_size = opt.max_fused_size;
+ param.seed = opt.seed;
+ param.verbosity = opt.verbosity;
+
+ if (Runner::Run(param, factory, circuit, state)) {
+ PrintAmplitudes(circuit.num_qubits, state_space, state);
+ }
+
+ return 0;
+}
diff --git a/apps/qsim_qtrajectory_cuda.cu b/apps/qsim_qtrajectory_cuda.cu
new file mode 100644
index 00000000..65fe1cd3
--- /dev/null
+++ b/apps/qsim_qtrajectory_cuda.cu
@@ -0,0 +1,334 @@
+// Copyright 2019 Google LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "../lib/channels_qsim.h"
+#include "../lib/circuit_qsim_parser.h"
+#include "../lib/expect.h"
+#include "../lib/fuser_mqubit.h"
+#include "../lib/gates_qsim.h"
+#include "../lib/io_file.h"
+#include "../lib/qtrajectory.h"
+#include "../lib/simulator_cuda.h"
+
+struct Options {
+ std::string circuit_file;
+ std::vector times = {std::numeric_limits::max()};
+ double amplitude_damp_const = 0;
+ double phase_damp_const = 0;
+ unsigned traj0 = 0;
+ unsigned num_trajectories = 10;
+ unsigned max_fused_size = 2;
+ unsigned verbosity = 0;
+};
+
+constexpr char usage[] = "usage:\n ./qsim_qtrajectory_cuda.x "
+ "-c circuit_file -d times_to_calculate_observables "
+ "-a amplitude_damping_const -p phase_damping_const "
+ "-t traj0 -n num_trajectories -f max_fused_size "
+ "-v verbosity\n";
+
+Options GetOptions(int argc, char* argv[]) {
+ Options opt;
+
+ int k;
+
+ auto to_int = [](const std::string& word) -> unsigned {
+ return std::atoi(word.c_str());
+ };
+
+ while ((k = getopt(argc, argv, "c:d:a:p:t:n:f:v:")) != -1) {
+ switch (k) {
+ case 'c':
+ opt.circuit_file = optarg;
+ break;
+ case 'd':
+ qsim::SplitString(optarg, ',', to_int, opt.times);
+ break;
+ case 'a':
+ opt.amplitude_damp_const = std::atof(optarg);
+ break;
+ case 'p':
+ opt.phase_damp_const = std::atof(optarg);
+ break;
+ case 't':
+ opt.traj0 = std::atoi(optarg);
+ break;
+ case 'n':
+ opt.num_trajectories = std::atoi(optarg);
+ break;
+ case 'f':
+ opt.max_fused_size = std::atoi(optarg);
+ break;
+ case 'v':
+ opt.verbosity = std::atoi(optarg);
+ break;
+ break;
+ default:
+ qsim::IO::errorf(usage);
+ exit(1);
+ }
+ }
+
+ return opt;
+}
+
+bool ValidateOptions(const Options& opt) {
+ if (opt.circuit_file.empty()) {
+ qsim::IO::errorf("circuit file is not provided.\n");
+ qsim::IO::errorf(usage);
+ return false;
+ }
+
+ if (opt.times.size() == 0) {
+ qsim::IO::errorf("times to calculate observables are not provided.\n");
+ return false;
+ }
+
+ for (std::size_t i = 1; i < opt.times.size(); i++) {
+ if (opt.times[i - 1] == opt.times[i]) {
+ qsim::IO::errorf("duplicate times to calculate observables.\n");
+ return false;
+ } else if (opt.times[i - 1] > opt.times[i]) {
+ qsim::IO::errorf("times to calculate observables are not sorted.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+template
+std::vector> AddNoise(
+ const qsim::Circuit& circuit, const std::vector& times,
+ const Channel1& channel1, const Channel2& channel2) {
+ std::vector> ncircuits;
+ ncircuits.reserve(times.size());
+
+ qsim::NoisyCircuit ncircuit;
+
+ ncircuit.num_qubits = circuit.num_qubits;
+ ncircuit.channels.reserve(5 * circuit.gates.size());
+
+ unsigned cur_time_index = 0;
+
+ for (std::size_t i = 0; i < circuit.gates.size(); ++i) {
+ const auto& gate = circuit.gates[i];
+
+ ncircuit.channels.push_back(qsim::MakeChannelFromGate(3 * gate.time, gate));
+
+ for (auto q : gate.qubits) {
+ ncircuit.channels.push_back(channel1.Create(3 * gate.time + 1, q));
+ }
+
+ for (auto q : gate.qubits) {
+ ncircuit.channels.push_back(channel2.Create(3 * gate.time + 2, q));
+ }
+
+ unsigned t = times[cur_time_index];
+
+ if (i == circuit.gates.size() - 1 || t < circuit.gates[i + 1].time) {
+ ncircuits.push_back(std::move(ncircuit));
+
+ ncircuit = {};
+
+ if (i < circuit.gates.size() - 1) {
+ if (circuit.gates[i + 1].time > times.back()) {
+ break;
+ }
+
+ ncircuit.num_qubits = circuit.num_qubits;
+ ncircuit.channels.reserve(5 * circuit.gates.size());
+ }
+
+ ++cur_time_index;
+ }
+ }
+
+ return ncircuits;
+}
+
+template
+std::vector>> GetObservables(
+ unsigned num_qubits) {
+ std::vector>> observables;
+ observables.reserve(num_qubits);
+
+ using X = qsim::GateX;
+
+ for (unsigned q = 0; q < num_qubits; ++q) {
+ observables.push_back({{{1.0, 0.0}, {X::Create(0, q)}}});
+ }
+
+ return observables;
+}
+
+int main(int argc, char* argv[]) {
+ using namespace qsim;
+
+ using fp_type = float;
+
+ struct Factory {
+ using Simulator = qsim::SimulatorCUDA;
+ using StateSpace = Simulator::StateSpace;
+
+ Factory(const StateSpace::Parameter& param1,
+ const Simulator::Parameter& param2)
+ : param1(param1), param2(param2) {}
+
+ StateSpace CreateStateSpace() const {
+ return StateSpace(param1);
+ }
+
+ Simulator CreateSimulator() const {
+ return Simulator(param2);
+ }
+
+ const StateSpace::Parameter& param1;
+ const Simulator::Parameter& param2;
+ };
+
+ using Simulator = Factory::Simulator;
+ using StateSpace = Simulator::StateSpace;
+ using State = StateSpace::State;
+ using Fuser = MultiQubitGateFuser>;
+ using QTSimulator = QuantumTrajectorySimulator,
+ MultiQubitGateFuser,
+ Simulator>;
+
+ auto opt = GetOptions(argc, argv);
+ if (!ValidateOptions(opt)) {
+ return 1;
+ }
+
+ Circuit> circuit;
+ unsigned maxtime = opt.times.back();
+ if (!CircuitQsimParser::FromFile(maxtime, opt.circuit_file,
+ circuit)) {
+ return 1;
+ }
+
+ if (opt.times.size() == 1
+ && opt.times[0] == std::numeric_limits::max()) {
+ opt.times[0] = circuit.gates.back().time;
+ }
+
+ StateSpace::Parameter param1;
+ Simulator::Parameter param2;
+ Factory factory(param1, param2);
+
+ Simulator simulator = factory.CreateSimulator();
+ StateSpace state_space = factory.CreateStateSpace();
+
+ State state = state_space.Create(circuit.num_qubits);
+
+ if (state_space.IsNull(state)) {
+ IO::errorf("not enough memory: is the number of qubits too large?\n");
+ return 1;
+ }
+
+ typename QTSimulator::Parameter param3;
+ param3.max_fused_size = opt.max_fused_size;
+ param3.verbosity = opt.verbosity;
+ param3.apply_last_deferred_ops = true;
+
+ auto channel1 = AmplitudeDampingChannel(opt.amplitude_damp_const);
+ auto channel2 = PhaseDampingChannel(opt.phase_damp_const);
+
+ auto noisy_circuits = AddNoise(circuit, opt.times, channel1, channel2);
+
+ auto observables = GetObservables>(circuit.num_qubits);
+
+ std::vector>>> results;
+ results.reserve(opt.num_trajectories);
+
+ QTSimulator::Stat stat;
+
+ using CleanResults = std::vector>>;
+ CleanResults primary_results(noisy_circuits.size());
+
+ for (unsigned i = 0; i < opt.num_trajectories; ++i) {
+ results.push_back({});
+ results[i].reserve(noisy_circuits.size());
+
+ state_space.SetStateZero(state);
+
+ auto seed = noisy_circuits.size() * (i + opt.traj0);
+
+ for (unsigned s = 0; s < noisy_circuits.size(); ++s) {
+ if (!QTSimulator::RunOnce(param3, noisy_circuits[s], seed++,
+ state_space, simulator, state, stat)) {
+ return 1;
+ }
+
+ results[i].push_back({});
+ results[i][s].reserve(observables.size());
+
+ primary_results[s].reserve(observables.size());
+
+ if (stat.primary && !primary_results[s].empty()) {
+ for (std::size_t k = 0; k < observables.size(); ++k) {
+ results[i][s].push_back(primary_results[s][k]);
+ }
+ } else {
+ for (const auto& obs : observables) {
+ auto result = ExpectationValue(obs, simulator, state);
+ results[i][s].push_back(result);
+
+ if (stat.primary) {
+ primary_results[s].push_back(result);
+ param3.apply_last_deferred_ops = false;
+ }
+ }
+ }
+ }
+ }
+
+ for (unsigned i = 1; i < opt.num_trajectories; ++i) {
+ for (unsigned s = 0; s < noisy_circuits.size(); ++s) {
+ for (unsigned k = 0; k < observables.size(); ++k) {
+ results[0][s][k] += results[i][s][k];
+ }
+ }
+ }
+
+ double f = 1.0 / opt.num_trajectories;
+
+ for (unsigned s = 0; s < noisy_circuits.size(); ++s) {
+ for (unsigned k = 0; k < observables.size(); ++k) {
+ results[0][s][k] *= f;
+ }
+ }
+
+ for (unsigned s = 0; s < noisy_circuits.size(); ++s) {
+ IO::messagef("#time=%u\n", opt.times[s]);
+
+ for (unsigned k = 0; k < observables.size(); ++k) {
+ IO::messagef("%4u %4u %17.9g %17.9g\n", s, k,
+ std::real(results[0][s][k]), std::imag(results[0][s][k]));
+ }
+ }
+
+ return 0;
+}
diff --git a/apps/qsim_von_neumann.cc b/apps/qsim_von_neumann.cc
index df9b32fb..46576083 100644
--- a/apps/qsim_von_neumann.cc
+++ b/apps/qsim_von_neumann.cc
@@ -28,6 +28,11 @@
#include "../lib/io_file.h"
#include "../lib/run_qsim.h"
#include "../lib/simmux.h"
+#include "../lib/util_cpu.h"
+
+constexpr char usage[] = "usage:\n ./qsim_von_neumann -c circuit -d maxtime "
+ "-s seed -t threads -f max_fused_size "
+ "-v verbosity -z\n";
struct Options {
std::string circuit_file;
@@ -36,18 +41,15 @@ struct Options {
unsigned num_threads = 1;
unsigned max_fused_size = 2;
unsigned verbosity = 0;
+ bool denormals_are_zeros = false;
};
Options GetOptions(int argc, char* argv[]) {
- constexpr char usage[] = "usage:\n ./qsim_von_neumann -c circuit -d maxtime "
- "-s seed -t threads -f max_fused_size "
- "-v verbosity\n";
-
Options opt;
int k;
- while ((k = getopt(argc, argv, "c:d:s:t:f:v:")) != -1) {
+ while ((k = getopt(argc, argv, "c:d:s:t:f:v:z")) != -1) {
switch (k) {
case 'c':
opt.circuit_file = optarg;
@@ -67,6 +69,9 @@ Options GetOptions(int argc, char* argv[]) {
case 'v':
opt.verbosity = std::atoi(optarg);
break;
+ case 'z':
+ opt.denormals_are_zeros = true;
+ break;
default:
qsim::IO::errorf(usage);
exit(1);
@@ -79,6 +84,7 @@ Options GetOptions(int argc, char* argv[]) {
bool ValidateOptions(const Options& opt) {
if (opt.circuit_file.empty()) {
qsim::IO::errorf("circuit file is not provided.\n");
+ qsim::IO::errorf(usage);
return false;
}
@@ -99,6 +105,10 @@ int main(int argc, char* argv[]) {
return 1;
}
+ if (opt.denormals_are_zeros) {
+ SetFlushToZeroAndDenormalsAreZeros();
+ }
+
struct Factory {
Factory(unsigned num_threads) : num_threads(num_threads) {}
diff --git a/apps/qsimh_amplitudes.cc b/apps/qsimh_amplitudes.cc
index cf57f121..7cb1b085 100644
--- a/apps/qsimh_amplitudes.cc
+++ b/apps/qsimh_amplitudes.cc
@@ -30,12 +30,13 @@
#include "../lib/run_qsimh.h"
#include "../lib/simmux.h"
#include "../lib/util.h"
+#include "../lib/util_cpu.h"
constexpr char usage[] = "usage:\n ./qsimh_amplitudes -c circuit_file "
"-d maxtime -k part1_qubits "
"-w prefix -p num_prefix_gates -r num_root_gates "
"-i input_file -o output_file -t num_threads "
- "-v verbosity\n";
+ "-v verbosity -z\n";
struct Options {
std::string circuit_file;
@@ -48,6 +49,7 @@ struct Options {
unsigned num_root_gatexs = 0;
unsigned num_threads = 1;
unsigned verbosity = 0;
+ bool denormals_are_zeros = false;
};
Options GetOptions(int argc, char* argv[]) {
@@ -59,7 +61,7 @@ Options GetOptions(int argc, char* argv[]) {
return std::atoi(word.c_str());
};
- while ((k = getopt(argc, argv, "c:d:k:w:p:r:i:o:t:v:")) != -1) {
+ while ((k = getopt(argc, argv, "c:d:k:w:p:r:i:o:t:v:z")) != -1) {
switch (k) {
case 'c':
opt.circuit_file = optarg;
@@ -91,6 +93,9 @@ Options GetOptions(int argc, char* argv[]) {
case 'v':
opt.verbosity = std::atoi(optarg);
break;
+ case 'z':
+ opt.denormals_are_zeros = true;
+ break;
default:
qsim::IO::errorf(usage);
exit(1);
@@ -180,6 +185,10 @@ int main(int argc, char* argv[]) {
}
auto parts = GetParts(circuit.num_qubits, opt.part1);
+ if (opt.denormals_are_zeros) {
+ SetFlushToZeroAndDenormalsAreZeros();
+ }
+
std::vector bitstrings;
auto num_qubits = circuit.num_qubits;
if (!BitstringsFromFile(num_qubits, opt.input_file, bitstrings)) {
diff --git a/apps/qsimh_base.cc b/apps/qsimh_base.cc
index 7b9190d8..eb0c9c6c 100644
--- a/apps/qsimh_base.cc
+++ b/apps/qsimh_base.cc
@@ -30,11 +30,12 @@
#include "../lib/run_qsimh.h"
#include "../lib/simmux.h"
#include "../lib/util.h"
+#include "../lib/util_cpu.h"
constexpr char usage[] = "usage:\n ./qsimh_base -c circuit_file "
"-d maximum_time -k part1_qubits "
"-w prefix -p num_prefix_gates -r num_root_gates "
- "-t num_threads -v verbosity\n";
+ "-t num_threads -v verbosity -z\n";
struct Options {
std::string circuit_file;
@@ -45,6 +46,7 @@ struct Options {
unsigned num_root_gatexs = 0;
unsigned num_threads = 1;
unsigned verbosity = 0;
+ bool denormals_are_zeros = false;
};
Options GetOptions(int argc, char* argv[]) {
@@ -56,7 +58,7 @@ Options GetOptions(int argc, char* argv[]) {
return std::atoi(word.c_str());
};
- while ((k = getopt(argc, argv, "c:d:k:w:p:r:t:v:")) != -1) {
+ while ((k = getopt(argc, argv, "c:d:k:w:p:r:t:v:z")) != -1) {
switch (k) {
case 'c':
opt.circuit_file = optarg;
@@ -82,6 +84,9 @@ Options GetOptions(int argc, char* argv[]) {
case 'v':
opt.verbosity = std::atoi(optarg);
break;
+ case 'z':
+ opt.denormals_are_zeros = true;
+ break;
default:
qsim::IO::errorf(usage);
exit(1);
@@ -142,6 +147,10 @@ int main(int argc, char* argv[]) {
}
auto parts = GetParts(circuit.num_qubits, opt.part1);
+ if (opt.denormals_are_zeros) {
+ SetFlushToZeroAndDenormalsAreZeros();
+ }
+
uint64_t num_bitstrings =
std::min(uint64_t{8}, uint64_t{1} << circuit.num_qubits);
diff --git a/dev-requirements.txt b/dev-requirements.txt
new file mode 100644
index 00000000..2d2638bc
--- /dev/null
+++ b/dev-requirements.txt
@@ -0,0 +1,3 @@
+black~=22.3.0
+flynt~=0.60
+pytest
diff --git a/docs/_book.yaml b/docs/_book.yaml
index 3f344eb1..2fe9a2c8 100644
--- a/docs/_book.yaml
+++ b/docs/_book.yaml
@@ -6,51 +6,62 @@ upper_tabs:
menu:
- include: /_book/menu_software.yaml
lower_tabs:
- # Subsite tabs
- other:
- - name: "Guide & Tutorials"
- contents:
- - title: "qsim and qsimh"
- path: /qsim/overview
- - title: "Usage"
- path: /qsim/usage
- - title: "Installing qsimcirq"
- path: /qsim/install_qsimcirq
- - title: "Cirq interface"
- path: /qsim/cirq_interface
- - title: "Input circuit file format"
- path: /qsim/input_format
- - title: "Template naming"
- path: /qsim/type_reference
- - title: "Build with Bazel"
- path: /qsim/bazel
- - title: "Testing qsim"
- path: /qsim/testing
- - title: "Docker"
- path: /qsim/docker
- - title: "Release process"
- path: /qsim/release
+ # Subsite tabs
+ other:
+ - name: "Tutorials"
+ contents:
+ - title: "Get started with qsimcirq"
+ path: /qsim/tutorials/qsimcirq
+ - heading: "qsim on Google Cloud"
+ - title: "Before you begin"
+ path: /qsim/tutorials/gcp_before_you_begin
+ - title: "CPU-based simulation"
+ path: /qsim/tutorials/gcp_cpu
+ - title: "GPU-based simulation"
+ path: /qsim/tutorials/gcp_gpu
+ - title: "Multinode simulation"
+ path: /qsim/tutorials/multinode
+ - heading: "Other tutorials"
+ - title: "Simulate a large circuit"
+ path: /qsim/tutorials/q32d14
+ - title: "Simulate noise"
+ path: /qsim/tutorials/noisy_qsimcirq
- - heading: "Tutorials"
- - title: "Get started with qsimcirq"
- path: /qsim/tutorials/qsimcirq
- - title: "Quantum simulation on GCP with Cirq and qsim"
- path: /qsim/tutorials/qsimcirq_gcp
- - title: "Simulate a large quantum circuit"
- path: /qsim/tutorials/q32d14
+ - name: "Guides"
+ contents:
+ - title: "Introduction"
+ path: /qsim/overview
+ - title: "Choosing hardware"
+ path: /qsim/choose_hw
+ - title: "Command line reference"
+ path: /qsim/usage
+ - title: "Python interface"
+ path: /qsim/cirq_interface
+ - title: "C++ templates"
+ path: /qsim/type_reference
+ - title: "C++ builds with Bazel"
+ path: /qsim/bazel
+ - title: "Docker builds"
+ path: /qsim/docker
+ - title: "Testing qsim"
+ path: /qsim/testing
+ - title: "Release process"
+ path: /qsim/release
- - name: "Python Reference"
- skip_translation: true
- contents:
- - title: "All Symbols"
- path: /reference/python/qsimcirq/all_symbols
- - include: /reference/python/qsimcirq/_toc.yaml
+ - name: "Python Reference"
+ skip_translation: true
+ contents:
+ - title: "All Symbols"
+ path: /reference/python/qsimcirq/all_symbols
+ - include: /reference/python/qsimcirq/_toc.yaml
- - name: "C++ Reference"
- skip_translation: true
- contents:
- - title: "All Symbols"
- path: /reference/cc/qsim
- - include: /reference/cc/qsim/_doxygen.yaml
+ - name: "C++ Reference"
+ skip_translation: true
+ contents:
+ - title: "All Symbols"
+ path: /reference/cc/qsim
+ - include: /reference/cc/qsim/_doxygen.yaml
- include: /_book/upper_tabs_right.yaml
+
+
diff --git a/docs/_index.yaml b/docs/_index.yaml
index 0b93d76c..e525f69c 100644
--- a/docs/_index.yaml
+++ b/docs/_index.yaml
@@ -15,7 +15,7 @@
book_path: /qsim/_book.yaml
project_path: /qsim/_project.yaml
description: >
- Quantum circuit simulators qsim and qsimh.
+ Quantum circuit simulator qsim.
landing_page:
custom_css_path: /site-assets/css/style.css
rows:
@@ -24,7 +24,7 @@ landing_page:
- hero
- description-50
- padding-large
- heading: qsim and qsimh
+ heading: qsim
icon:
path: /site-assets/images/icons/icon_qsim.png
description: >
@@ -39,12 +39,6 @@ landing_page:
qsim is integrated with Cirq and can be used to run simulations of up
to 40 qubits on a 90 core Intel Xeon workstation.
-
qsimh
-
- qsimh is a hybrid Schrödinger-Feynman simulator built for parallel
- execution on a cluster of machines. It produces amplitudes for user-
- specified output bitstrings.
-
buttons:
- label: Get started with qsim on Cirq
path: /qsim/tutorials/qsimcirq
@@ -64,9 +58,9 @@ landing_page:
# Define a circuit to run
# (Example is from the 2019 "Quantum Supremacy" experiement)
- circuit = cirq.experiments.
+ circuit = (cirq.experiments.
random_rotations_between_grid_interaction_layers_circuit(
- qubits=qubits, depth=16)
+ qubits=qubits, depth=16))
# Measure qubits at the end of the circuit
circuit.append(cirq.measure(*qubits, key='all_qubits'))
@@ -84,28 +78,11 @@ landing_page:
options:
- cards
items:
- - heading: "Schrödinger simulation via qsim"
- image_path: /site-assets/images/cards/qsim-card-schrodinger.png
- description: >
- qsim is a full wavefunction simulator that has been optimized to support
- vectorized operations and multi-threading.
- buttons:
- - label: "Learn more"
- path: /qsim/usage
- - heading: "Schrödinger-Feynman simulation via qsimh"
- image_path: /site-assets/images/cards/qsim-card-schrodinger-feynman.png
- description: >
- qsimh is a hybrid Schrödinger-Feynman simulator. It simulates separate
- disjoint sets of qubit using a full wave vector simulator, and then uses
- Feynman paths to sum over gates that span the sets.
- buttons:
- - label: "Learn more"
- path: /qsim/usage#qsimh_base_usage
- heading: "Cirq integration"
image_path: /site-assets/images/cards/qsim-card-cirq-integrations.png
description: >
Cirq is a python framework for writing, simulating, and executing
- quantum programs. Cirq’s built in simulator is useful to around 20
+ quantum programs. Cirq's built in simulator is useful to around 20
qubits. By using the qsim Cirq simulator one can boost the number of
qubits simulated to be mostly limited by available ram. Up to 40 qubits
can be simulated on a 90 core Intel Xeon workstation.
@@ -115,9 +92,27 @@ landing_page:
- heading: "Install qsim on GCP"
image_path: /site-assets/images/cards/qsim-card-gcp.jpg
description: >
- Learn how to simulate up to 38 qubits on Google Cloud’s Compute Engine.
+ Learn how to simulate up to 38 qubits on Google Cloud's Compute Engine.
qsim has a prepackaged docker image that allows easy deployment of qsim,
Juypter, and Cirq onto a virtual machine.
buttons:
- label: "Learn more"
- path: /qsim/tutorials/qsimcirq_gcp
+ path: /qsim/tutorials/gcp_before_you_begin
+ - heading: "Upgrades to qsim"
+ image_path: /site-assets/images/cards/qsim-card-schrodinger.png
+ description: >
+ To help researchers and developers develop quantum algorithms today, we
+ have made updates to qsim that make it more performant and intuitive,
+ and more "hardware-like".
+ buttons:
+ - label: "Learn more"
+ path: https://opensource.googleblog.com/2021/11/Upgrading%20qsim%20Google%20Quantum%20AIs%20Open%20Source%20Quantum%20Simulator%20.html?linkId=138925083
+ - heading: "Integrating qsim with NVIDIA's cuQuantum SDK"
+ image_path: /qsim/images/qsim_nvidia.png
+ description: >
+ The integration between qsim and the NVIDIA cuQuantum SDK will enable
+ qsim users to make the most of GPUs when developing quantum algorithms
+ and applications.
+ buttons:
+ - label: "Learn more"
+ path: https://opensource.googleblog.com/2021/11/qsim%20integrates%20with%20NVIDIA%20cuQuantum%20SDK%20to%20accelerate%20quantum%20circuit%20simulations%20on%20NVIDIA%20GPUs.html
diff --git a/docs/choose_hw.md b/docs/choose_hw.md
new file mode 100644
index 00000000..3be2bc06
--- /dev/null
+++ b/docs/choose_hw.md
@@ -0,0 +1,934 @@
+
+
+# Choosing hardware for your qsim simulation
+
+As you increase the size and complexity of your quantum simulation, you rapidly
+require a large increase in computational power. This guide describes
+considerations that can help you choose hardware for your simulation.
+
+Your simulation setup depends on the following:
+
+* Noise; noisy (realistic) simulations require more compute power than
+ noiseless (idealised) simulations.
+* Number of qubits.
+* Circuit depth; the number of time steps required to perform the circuit.
+
+## Quick start
+
+The following graph provides loose guidelines to help you get started with
+choosing hardware for your simulation. The qubit upper bounds in this chart are
+not technical limits.
+
+![Decision tree for hardware to run a qsim simulation.](images/choose_hw.png)
+
+## Choose hardware for your simulation
+
+### 1. Evaluate whether your simulation can be run locally
+
+If you have a modern laptop with at least 8GB of memory, you can run your
+simulation locally in the following cases:
+
+* Noiseless simulations that use fewer than 29 qubits.
+* Noisy simulations that use fewer than 18 qubits.
+
+If you intend to simulate a circuit many times, consider multinode simulation.
+For more information about multinode simulation [see step 5,
+below](#5_consider_multiple_compute_nodes).
+
+### 2. Estimate your memory requirements
+
+You can estimate your memory requirements with the following rule of thumb:
+$ memory\ required = 8 \cdot 2^N bytes $ for an N-qubit circuit
+
+In addition to memory size, consider the bandwidth of your memory. qsim performs
+best when it can use the maximum number of threads. Multi-threaded simulation
+benefits from high-bandwidth memory (above 100GB/s).
+
+### 3. Decide between CPUs and GPUs
+
+* GPU hardware starts to outperform CPU hardware significantly (up to 15x
+ faster) for circuits with more than 20 qubits.
+* The maximum number of qubits that you can simulate with a GPU is limited by
+ the memory of the GPU. Currently, for a noiseless simulation on an NVIDIA
+ A100 GPU (with 40GB of memory), the maximum number of qubits is 32.
+* For noiseless simulations with 32-40 qubits, you can use CPUs. However, the
+ runtime time increases exponentially with the number of qubits, and runtimes
+ are long for simulations above 32 qubits.
+
+The following charts show the runtime for a random circuit run on
+[Google Compute Engine](https://cloud.google.com/compute), using an NVidia A100
+GPU, and a compute-optimized CPU (c2-standard-4). The first chart shows the
+runtimes for the noiseless simulation. The second chart shows the runtimes for a
+noisy simulation, using a phase damping channel (p=0.01). The charts use a log
+scale.
+
+![qsim runtime comparison on multipe processors: noiseless](images/qsim_runtime_comparison_noiseless.png)
+![qsim runtime comparison on multipe processors: noisy](images/qsim_runtime_comparison_noisy.png)
+
+### 4. Select a specific machine
+
+After you decide whether you want to use CPUs or GPUs for your simulation,
+choose a specific machine:
+
+1. Restrict your options to machines that meet your memory requirements. For
+ more information about memory requirements, see step 2.
+2. Decide if performance (speed) or cost is more important to you:
+ * For a table of performance benchmarks, see
+ [Sample benchmarks](#sample_benchmarks) below.
+ * For more information about GCP pricing, see the
+ [Google Cloud pricing calculator](https://cloud.google.com/products/calculator).
+ * Prioritizing performance is particularly important in the following
+ scenarios:
+ * Simulating with a **higher f value** (f is the maximum number of
+ qubits allowed per fused gate).
+ * For small to medium size circuits (up to 22 qubits), keep f low
+ (2 or 3).
+ * For medium to large size qubits (22+ qubits), use a higher f
+ typically, f=4 is the best option).
+ * Simulating a **deep circuit** (depth 30+).
+
+### 5. Consider multiple compute nodes
+
+Simulating in multinode mode is useful when your simulation can be parallelized.
+In a noisy simulation, the trajectories (also known as repetitions, iterations)
+are “embarrassingly parallelizable”, there is an automated workflow for
+distributing these trajectories over multiple nodes. A simulation of many
+noiseless circuits can also be distributed over multiple compute nodes.
+
+For mor information about running a mulitnode simulation, see [Multinode quantum
+simulation using HTCondor on Google Cloud](/qsim/tutorials/multinode).
+
+## Runtime estimates
+
+Runtime grows exponentially with the number of qubits, and linearly with circuit
+depth beyond 20 qubits.
+
+* For noiseless simulations, runtime grows at a rate of $ 2^N $ for an N-qubit
+ circuit. For more information about runtimes for small circuits, see
+ [Additional notes for advanced users](#additional_notes_for_advanced_users)
+ below).
+* For noisy simulations, runtime grows at a rate of $ 2^N $ multiplied by the
+ number of iterations for an N-qubit circuit.
+
+## Additional notes for advanced users
+
+* The impact of noise on simulation depends on:
+ * What type of errors are included in your noise channel (decoherence,
+ depolarizing channels, coherent errors, readout errors).
+ * How you can represent your noise model using Kraus operator formalism:
+ * Performance is best in the case where all Kraus operators are
+ proportional to unitary matrices, such as when using only a
+ depolarizing channel.
+ * Using noise which cannot be represented with Kraus operators
+ proportional to unitary matrices, can slow down simulations by a
+ factor of up to 6** **compared to using a depolarizing channel only
+ * Noisy simulations are faster with lower noise (when one Kraus
+ operator dominates).
+* Experimenting with the 'f' parameter (maximum number of qubits allowed per
+ fused gate):
+ * The advanced user is advised to try out multiple f values to optimize
+ their simulation setup.
+ * Note that f=2 or f=3 can be optimal for large circuits simulated on
+ CPUs with a smaller number of threads (say, up to four or eight
+ threads). However, this depends on the circuit structure.
+ * Note that f=6 is very rarely optimal.
+* Using the optimal number of threads:
+ * Use the maximum number of threads on CPUs for the best performance.
+ * If the maximum number of threads is not used on multi-socket machines
+ then it is advisable to distribute threads evenly to all sockets or to
+ run all threads within a single socket. Separate simulations on each
+ socket can be run simultaneously in the latter case.
+ * Note that currently the number of CPU threads does not affect the
+ performance for small circuits (smaller than 17 qubits). Only one thread
+ is used because of OpenMP overhead.
+* Runtime estimates for small circuits:
+ * For circuits that contain fewer than 20 qubits, the qsimcirq translation
+ layer performance overhead tends to dominate the runtime estimate. In
+ addition to this, qsim is not optimized for small circuits.
+ * The total small circuits runtime overhead for an N qubit circuit
+ depends on the circuit depth and on N. The overhead can be large enough to
+ conceal the $ 2^N $ growth in runtime.
+
+## Sample benchmarks
+
+**Noiseless simulation benchmarks data sheet**
+
+For a random circuit, depth=20, f=3, max threads.
+
+
+
+
processor type
+
+
machine
+
+
# of qubits
+
+
runtime
+
+
+
+
CPU
+
+
c2-standard-60
+
+
34
+
+
291.987
+
+
+
+
CPU
+
+
c2-standard-60
+
+
32
+
+
54.558
+
+
+
+
CPU
+
+
c2-standard-60
+
+
30
+
+
13.455
+
+
+
+
CPU
+
+
c2-standard-60
+
+
28
+
+
2.837
+
+
+
+
CPU
+
+
c2-standard-60
+
+
24
+
+
0.123
+
+
+
+
CPU
+
+
c2-standard-60
+
+
20
+
+
0.013
+
+
+
+
CPU
+
+
c2-standard-60
+
+
16
+
+
0.009
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
30
+
+
52.880
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
28
+
+
12.814
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
24
+
+
0.658
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
20
+
+
0.031
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
16
+
+
0.008
+
+
+
+
GPU
+
+
a100
+
+
32
+
+
7.415
+
+
+
+
GPU
+
+
a100
+
+
30
+
+
1.561
+
+
+
+
GPU
+
+
a100
+
+
28
+
+
0.384
+
+
+
+
GPU
+
+
a100
+
+
24
+
+
0.030
+
+
+
+
GPU
+
+
a100
+
+
20
+
+
0.010
+
+
+
+
GPU
+
+
a100
+
+
16
+
+
0.007
+
+
+
+
GPU
+
+
t4
+
+
30
+
+
10.163
+
+
+
+
GPU
+
+
t4
+
+
28
+
+
2.394
+
+
+
+
GPU
+
+
t4
+
+
24
+
+
0.118
+
+
+
+
GPU
+
+
t4
+
+
20
+
+
0.014
+
+
+
+
GPU
+
+
t4
+
+
16
+
+
0.007
+
+
+
+
+**Noisy simulation benchmarks data sheet**
+
+For one trajectory of a random circuit, depth=20, f=3, max threads.
+
+
+
+
processor type
+
+
machine
+
+
noise type
+
+
# of qubits
+
+
runtime
+
+
+
+
CPU
+
+
c2-standard-60
+
+
depolarizing
+
+
30
+
+
13.021
+
+
+
+
CPU
+
+
c2-standard-60
+
+
depolarizing
+
+
28
+
+
2.840
+
+
+
+
CPU
+
+
c2-standard-60
+
+
depolarizing
+
+
26
+
+
0.604
+
+
+
+
CPU
+
+
c2-standard-60
+
+
depolarizing
+
+
24
+
+
0.110
+
+
+
+
CPU
+
+
c2-standard-60
+
+
depolarizing
+
+
20
+
+
0.009
+
+
+
+
CPU
+
+
c2-standard-60
+
+
depolarizing
+
+
16
+
+
0.006
+
+
+
+
CPU
+
+
c2-standard-60
+
+
dephasing
+
+
30
+
+
122.788
+
+
+
+
CPU
+
+
c2-standard-60
+
+
dephasing
+
+
28
+
+
29.966
+
+
+
+
CPU
+
+
c2-standard-60
+
+
dephasing
+
+
26
+
+
6.378
+
+
+
+
CPU
+
+
c2-standard-60
+
+
dephasing
+
+
24
+
+
1.181
+
+
+
+
CPU
+
+
c2-standard-60
+
+
dephasing
+
+
20
+
+
0.045
+
+
+
+
CPU
+
+
c2-standard-60
+
+
dephasing
+
+
16
+
+
0.023
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
depolarizing
+
+
26
+
+
2.807
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
depolarizing
+
+
24
+
+
0.631
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
depolarizing
+
+
20
+
+
0.027
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
depolarizing
+
+
16
+
+
0.005
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
dephasing
+
+
26
+
+
33.038
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
dephasing
+
+
24
+
+
7.432
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
dephasing
+
+
20
+
+
0.230
+
+
+
+
CPU
+
+
c2-standard-4-4
+
+
dephasing
+
+
16
+
+
0.014
+
+
+
+
GPU
+
+
a100
+
+
depolarizing
+
+
30
+
+
1.568
+
+
+
+
GPU
+
+
a100
+
+
depolarizing
+
+
28
+
+
0.391
+
+
+
+
GPU
+
+
a100
+
+
depolarizing
+
+
26
+
+
0.094
+
+
+
+
GPU
+
+
a100
+
+
depolarizing
+
+
24
+
+
0.026
+
+
+
+
GPU
+
+
a100
+
+
depolarizing
+
+
20
+
+
0.006
+
+
+
+
GPU
+
+
a100
+
+
depolarizing
+
+
16
+
+
0.004
+
+
+
+
GPU
+
+
a100
+
+
dephasing
+
+
30
+
+
17.032
+
+
+
+
GPU
+
+
a100
+
+
dephasing
+
+
28
+
+
3.959
+
+
+
+
GPU
+
+
a100
+
+
dephasing
+
+
26
+
+
0.896
+
+
+
+
GPU
+
+
a100
+
+
dephasing
+
+
24
+
+
0.236
+
+
+
+
GPU
+
+
a100
+
+
dephasing
+
+
20
+
+
0.029
+
+
+
+
GPU
+
+
a100
+
+
dephasing
+
+
16
+
+
0.021
+
+
+
+
GPU
+
+
t4
+
+
depolarizing
+
+
30
+
+
10.229
+
+
+
+
GPU
+
+
t4
+
+
depolarizing
+
+
28
+
+
2.444
+
+
+
+
GPU
+
+
t4
+
+
depolarizing
+
+
26
+
+
0.519
+
+
+
+
GPU
+
+
t4
+
+
depolarizing
+
+
24
+
+
0.115
+
+
+
+
GPU
+
+
t4
+
+
depolarizing
+
+
20
+
+
0.009
+
+
+
+
GPU
+
+
t4
+
+
depolarizing
+
+
16
+
+
0.004
+
+
+
+
GPU
+
+
t4
+
+
dephasing
+
+
28
+
+
21.800
+
+
+
+
GPU
+
+
t4
+
+
dephasing
+
+
26
+
+
5.056
+
+
+
+
GPU
+
+
t4
+
+
dephasing
+
+
24
+
+
1.164
+
+
+
+
GPU
+
+
t4
+
+
dephasing
+
+
20
+
+
0.077
+
+
+
+
GPU
+
+
t4
+
+
dephasing
+
+
16
+
+
0.017
+
+
+
diff --git a/docs/cirq_interface.md b/docs/cirq_interface.md
index 20291fe8..24149708 100644
--- a/docs/cirq_interface.md
+++ b/docs/cirq_interface.md
@@ -50,7 +50,7 @@ which invokes qsim through the qsim-Cirq interface.
## Interface design and operations
The purpose of this interface is to provide a performant simulator for quantum
-circuits defined in Cirq.
+circuits defined in Cirq.
### Classes
@@ -75,12 +75,17 @@ This circuit can then be simulated using either `QSimSimulator` or
`QSimSimulator` uses a Schrödinger full state-vector simulator, suitable for
acquiring the complete state of a reasonably-sized circuit (~25 qubits on an
average PC, or up to 40 qubits on high-performance VMs).
+
Options for the simulator, including number of threads and verbosity, can be
-set with the `qsim_options` field using the `qsim_base` flag format defined in
-the [usage docs](./usage.md).
+set with the `qsim_options` field, which accepts a `QSimOptions` object as
+defined in
+[qsim_simulator.py](https://github.com/quantumlib/qsim/blob/master/qsimcirq/qsim_simulator.py).
+These options can also be passed as a {str: val} dict, using the format
+described by that class.
```
-qsim_options = {'t': 8, 'v': 0}
+# equivalent to {'t': 8, 'v': 0}
+qsim_options = qsimcirq.QSimOptions(cpu_threads=8, verbosity=0)
my_sim = qsimcirq.QSimSimulator(qsim_options)
myres = my_sim.simulate(program=my_circuit)
```
@@ -112,6 +117,18 @@ the circuit once for each repetition unless all measurements are terminal. This
ensures that nondeterminism from intermediate measurements is properly
reflected in the results.
+In rare cases when the state vector and gate matrices have many zero entries
+(denormal numbers), a significant performance slowdown can occur. Set
+the `denormals_are_zeros` option to `True` to prevent this issue potentially
+at the cost of a tiny precision loss:
+
+```
+# equivalent to {'t': 8, 'v': 0, 'z': True}
+qsim_options = qsimcirq.QSimOptions(cpu_threads=8, verbosity=0, denormals_are_zeros=True)
+my_sim = qsimcirq.QSimSimulator(qsim_options)
+myres = my_sim.simulate(program=my_circuit)
+```
+
#### QSimhSimulator
`QSimhSimulator` uses a hybrid Schrödinger-Feynman simulator. This limits it to
@@ -136,8 +153,8 @@ outlined in the [usage docs](./usage.md).
## Additional features
-The qsim-Cirq interface provides basic support for gate decomposition and
-circuit parameterization.
+The qsim-Cirq interface supports arbitrary gates and circuit parameterization.
+Additionally, GPU execution of circuits can be requested if GPUs are available.
### Gate decompositions
@@ -148,7 +165,36 @@ matrices, if one is specified.
### Parametrized circuits
-QSimCircuit objects can also contain
+`QSimCircuit` objects can also contain
[parameterized gates](https://cirq.readthedocs.io/en/stable/docs/tutorials/basics.html#Using-parameter-sweeps)
which have values assigned by Cirq's `ParamResolver`. See the link above for
details on how to use this feature.
+
+### GPU execution
+
+`QSimSimulator` provides optional support for GPU execution of circuits, which
+may improve performance. In order to use this feature, qsim must be compiled on
+a device with the [CUDA toolkit](https://developer.nvidia.com/cuda-downloads)
+and run on a device with available NVIDIA GPUs.
+
+Compilation for GPU follows the same steps outlined in the
+[Compiling qsimcirq](./cirq_interface.md#compiling-qsimcirq) section.
+To compile with the NVIDIA cuStateVec library (v1.0.0 or higher is required),
+set the environmment variable `CUQUANTUM_DIR` to the path to the cuStateVec
+library.
+
+`QSimOptions` provides five parameters to configure GPU execution. `use_gpu`
+is required to enable GPU execution:
+* `use_gpu`: if True, use GPU instead of CPU for simulation.
+* `gpu_mode`: use CUDA if set to 0 (default value) or use the NVIDIA cuStateVec
+library if set to any other value.
+
+If `use_gpu` is set and `gpu_mode` is set to 0, the remaining parameters can
+optionally be set to fine-tune perfomance for a specific device or circuit.
+In most cases, the default values provide good performance.
+* `gpu_sim_threads`: number of threads per CUDA block to use for the GPU
+Simulator. This must be a power of 2 in the range [32, 256].
+* `gpu_state_threads`: number of threads per CUDA block to use for the GPU
+StateSpace. This must be a power of 2 in the range [32, 1024].
+* `gpu_data_blocks`: number of data blocks to use on GPU. Below 16 data blocks,
+performance is noticeably reduced.
diff --git a/docs/images/choose_hw.png b/docs/images/choose_hw.png
new file mode 100644
index 00000000..76a894b4
Binary files /dev/null and b/docs/images/choose_hw.png differ
diff --git a/docs/images/colab_connect.png b/docs/images/colab_connect.png
new file mode 100644
index 00000000..1a52c18f
Binary files /dev/null and b/docs/images/colab_connect.png differ
diff --git a/docs/images/colab_connected.png b/docs/images/colab_connected.png
new file mode 100644
index 00000000..779a85ab
Binary files /dev/null and b/docs/images/colab_connected.png differ
diff --git a/docs/images/colab_remote.png b/docs/images/colab_remote.png
new file mode 100644
index 00000000..c1614895
Binary files /dev/null and b/docs/images/colab_remote.png differ
diff --git a/docs/images/qsim_runtime_comparison_noiseless.png b/docs/images/qsim_runtime_comparison_noiseless.png
new file mode 100644
index 00000000..5849813c
Binary files /dev/null and b/docs/images/qsim_runtime_comparison_noiseless.png differ
diff --git a/docs/images/qsim_runtime_comparison_noisy.png b/docs/images/qsim_runtime_comparison_noisy.png
new file mode 100644
index 00000000..ae736592
Binary files /dev/null and b/docs/images/qsim_runtime_comparison_noisy.png differ
diff --git a/docs/images/qsimcirq_gcp/connection.png b/docs/images/qsimcirq_gcp/connection.png
new file mode 100644
index 00000000..ca66bc8a
Binary files /dev/null and b/docs/images/qsimcirq_gcp/connection.png differ
diff --git a/docs/images/qsimcirq_gcp/container.png b/docs/images/qsimcirq_gcp/container.png
new file mode 100644
index 00000000..61ef9c07
Binary files /dev/null and b/docs/images/qsimcirq_gcp/container.png differ
diff --git a/docs/install_qsimcirq.md b/docs/install_qsimcirq.md
index 9fe69d4e..0a0f3182 100644
--- a/docs/install_qsimcirq.md
+++ b/docs/install_qsimcirq.md
@@ -1,49 +1,58 @@
# Installing qsimcirq
-The qsim-Cirq Python interface is available as a PyPI package for Linux users.
-For all other users, Dockerfiles are provided to install qsim in a contained
+The qsim-Cirq Python interface is available as a PyPI package for Linux, MacOS and Windows users.
+For all others, Dockerfiles are provided to install qsim in a contained
environment.
**Note:** The core qsim library (under
[lib/](https://github.com/quantumlib/qsim/blob/master/lib)) can be included
directly in C++ code without installing this interface.
-## Linux installation
+## Before installation
Prior to installation, consider opening a
[virtual environment](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
-The qsim-Cirq interface uses [CMake](https://cmake.org/) to ensure stable
-compilation of its C++ libraries across a variety of Linux distributions.
-CMake can be installed from their website, or with the command
-`apt-get install cmake`.
-
-Other prerequisites (including pybind11 and pytest) are included in the
+Prerequisites are included in the
[`requirements.txt`](https://github.com/quantumlib/qsim/blob/master/requirements.txt)
file, and will be automatically installed along with qsimcirq.
-To install the qsim-Cirq interface on Linux, simply run `pip3 install qsimcirq`.
-For examples of how to use this package, see the tests in
-[qsim/qsimcirq_tests/](https://github.com/quantumlib/qsim/blob/master/qsimcirq_tests/).
+If you'd like to develop qsimcirq, a separate set of dependencies are includes
+in the
+[`dev-requirements.txt`](https://github.com/quantumlib/qsim/blob/master/dev-requirements.txt)
+file. You can install them with `pip3 install -r dev-requirements.txt` or
+`pip3 install qsimcirq[dev]`.
+
+## Linux installation
+
+We provide `qsimcirq` Python wheels on 64-bit `x86` architectures with `Python 3.{6,7,8,9}`.
+
+Simply run `pip3 install qsimcirq`.
-## MacOS and Windows installation
+## MacOS installation
-For users interested in running qsim on a MacOS or Windows device, we strongly
-recommend using the [Docker config](./docker.md) provided with this
-repository.
+We provide `qsimcirq` Python wheels on `x86` architectures with `Python 3.{6,7,8,9}`.
-### Experimental install process
+Simply run `pip3 install qsimcirq`.
-Alternatively, MacOS and Windows users can follow the Linux install process,
-but it is currently untested on those platforms. Users are encouraged to report
-any issues seen with this process.
+## Windows installation
+
+We provide `qsimcirq` Python wheels on 64-bit `x86` and `amd64` architectures with `Python 3.{6,7,8,9}`.
+
+Simply run `pip3 install qsimcirq`.
+
+## There's no compatible wheel for my machine!
+
+If existing wheels do no meet your needs please open an issue with your machine configuration (i.e. CPU architecture, Python version) and consider using the [Docker config](./docker.md) provided with this repository.
## Testing
-After installing qsimcirq on your machine, you can test the installation by
+After installing `qsimcirq` on your machine, you can test the installation by
copying [qsimcirq_tests/qsimcirq_test.py](qsimcirq_tests/qsimcirq_test.py)
to your machine and running `python3 -m pytest qsimcirq_test.py`.
+It also has examples of how how to use this package.
+
**Note:** Because of how Python searches for modules, the test file cannot
be run from inside a clone of the qsim repository, or from any parent
directory of such a repository. Failure to meet this criteria may result
diff --git a/docs/tutorials/gcp_before_you_begin.md b/docs/tutorials/gcp_before_you_begin.md
new file mode 100644
index 00000000..13296dfe
--- /dev/null
+++ b/docs/tutorials/gcp_before_you_begin.md
@@ -0,0 +1,34 @@
+# Before you begin
+
+The following tutorials demonstrate how to configure the Google Cloud Platform
+to run quantum simulations with qsim.
+
+You can use Google Cloud to run high-performance CPU-based simulations or
+GPU-based simulations, depending on your requirements. For more information
+about making a choice between CPU- and GPU-based simulations, see
+[Choosing hardware for your qsim simulation]().
+
+This tutorial depends on resources provided by the Google Cloud Platform.
+
+* **Ensure that you have a Google Cloud Platform project.** You can reuse an
+ existing project, or create a new one, from your
+ [project dashboard](https://console.cloud.google.com/projectselector2/home/dashboard).
+ * For more information about Google Cloud projects, see
+ [Creating and managing projects](https://cloud.google.com/resource-manager/docs/creating-managing-projects)
+ in the Google Cloud documentation.
+* **Ensure that billing is enabled for your project.**
+ * For more information about billing, see
+ [Enable, disable, or change billing for a project](https://cloud.google.com/billing/docs/how-to/modify-project#enable-billing)
+ in the Google Cloud documentation.
+* **Estimate costs for your project** Use the
+ [Google Cloud Pricing Calculator](https://cloud.google.com/products/calculator)
+ to estimate the scale of the costs you might incur, based on your projected
+ usage. The resources that you use to simulate a quantum circuit on the
+ Google Cloud platform are billable.
+* **Enable the Compute Engine API for your project.** You can enable APIs from
+ the [API Library Console](https://console.cloud.google.com/apis/library). On
+ the console, in the search box, enter "compute engine api" to find the API
+ and click through to Enable it.
+ * For more information about enabling the Compute Engine API, see
+ [Getting Started](https://cloud.google.com/apis/docs/getting-started) in
+ the Google Cloud documentation.
diff --git a/docs/tutorials/gcp_cpu.md b/docs/tutorials/gcp_cpu.md
new file mode 100644
index 00000000..0795e622
--- /dev/null
+++ b/docs/tutorials/gcp_cpu.md
@@ -0,0 +1,243 @@
+# CPU-based quantum simulation on Google Cloud
+
+In this tutorial, you configure and test a virtual machine (VM) to run CPU-based
+quantum simulations. The configuration in this tutorial uses the qsim Docker
+container, running on a Google Compute Engine VM.
+
+## 1. Create a virtual machine
+
+Follow the instructions in the
+[Quickstart using a Linux VM](https://cloud.google.com/compute/docs/quickstart-linux)
+guide to create a VM. In addition to the guidance under the Create a Linux VM
+instance heading, ensure that your VM has the following properties:
+
+* In the **Machine Configuration** section:
+ 1. Select the tab for the **Compute Optimized** machine family.
+ 2. In the machine **Series** option, choose **C2**.
+ 3. In the **Machine type** option, choose **c2-standard-16**. This option
+ gives you 16 virtual CPUS and 64MB of RAM.
+ Note: This choice is for demonstration purposes only. For a live
+ experiment, see [Choosing hardware for your qsim
+ simulation](/qsim/choose_hw).
+* In the **Boot disk section**, click the **Change** button, and choose
+ **Container-Optimized** operating system. This overrides the seletion in
+ step 3 in [Create a Linux VM
+ instance](https://cloud.google.com/compute/docs/quickstart-linux#create_a_linux_vm_instance).
+* In the **Firewall** section, ensure that both the **Allow HTTP traffic**
+ checkbox and the **Allow HTTPS traffic** checkbox are selected.
+
+When Google Cloud finishes creating the VM, you can see your VM listed in the
+[Compute Instances dashboard](https://pantheon.corp.google.com/compute/instances)
+for your project.
+
+### Find out more
+* [Choosing the right machine family and
+ type](https://cloud.google.com/blog/products/compute/choose-the-right-google-compute-engine-machine-type-for-you)
+* [Container-Optimized OS
+ Overview](https://cloud.google.com/container-optimized-os/docs/concepts/features-and-benefits)
+
+## 2. Prepare your computer
+
+Use SSH to create an encrypted tunnel from your computer to your VM and redirect
+a local port to your VM over the tunnel.
+
+1. Install the `gcloud` command line tool. Follow the instructions in the
+ [Installing Cloud SDK](https://cloud.google.com/sdk/docs/install)
+ documentation.
+2. After installation, run the `gcloud init` command to initialize the Google
+ Cloud environment. You need to provide the `gcloud` tool with details
+ about your VM, such as the project name and the region where your VM is
+ located.
+ 1. You can verify your environment by using the `gcloud config list`
+ command.
+3. Create an SSH tunnel and redirect a local port to use the tunnel by typing
+ the following command in a terminal window on your computer. Replace
+ `[YOUR_INSTANCE_NAME]` with the name of your VM.
+
+ ```
+ gcloud compute ssh [YOUR_INSTANCE_NAME] -- -L 8888:localhost:8888
+ ```
+
+When the command completes successfully, your prompt changes from your local
+machine to your virtual machine.
+
+## 3. Start the qsim Docker container on your virtual machine
+
+1. On the VM that you just created, start the qsim container:
+
+ ```
+ docker run -v `pwd`:/homedir -p 8888:8888 gcr.io/quantum-builds/github.com/quantumlib/jupyter_qsim:latest &
+ ```
+
+ If you see a `permission denied` error message, you might need to add `sudo`
+ before your Docker command. For more information about running Docker, see the
+ [`docker run` command reference](https://docs.docker.com/engine/reference/run/#general-form).
+
+2. Verify the output from Docker when as it downloads and starts the container.
+ The last few lines should be similar to the following output:
+
+ ```
+ To access the notebook, open this file in a browser:
+ file:///root/.local/share/jupyter/runtime/nbserver-1-open.html
+ Or copy and paste one of these URLs:
+ http://e1f7a7cca9fa:8888/?token=aa16e1b6d3f51c58928037d34cc6854dac47347dd4c0eae5
+ or http://127.0.0.1:8888/?token=aa16e1b6d3f51c58928037d34cc6854dac47347dd4c0eae5
+ ```
+
+3. Copy the URL in the last line of output from your console, and save it for
+ the next task.
+
+## 4. Connect to your virtual machine
+
+The easiest way to use your VM is through a notebook environment like
+[Google Colaboratory](https://colab.sandbox.google.com/notebooks/intro.ipynb?utm_source=scs-index#recent=true)
+(Colab). Google Colab is a free, hosted notebook environment that enables you to
+write, execute, and share Python code from your browser.
+
+However, the qsim Docker image also includes a Jupyter kernel and other
+command-line tools. These tools enable you to connect directly to your container
+and run your code.
+
+* {Colab}
+
+ You can write code in a Colab notebook, and use your VM to run your code. In
+ this scenario, we use the
+ [Get Started with qsimcirq Colab notebook](https://quantumai.google/qsim/tutorials/qsimcirq).
+
+ 1. Open the
+ [Get Started with qsimcirq notebook](https://quantumai.google/qsim/tutorials/qsimcirq).
+ 2. Click the **Connect** drop-down menu.
+ the Connect button to open the menu.
+ 3. Choose the **Connect to a local runtime** option to open the Local
+ connection settings window.
+
+ 4. In the **Backend URL** text field, paste the URL that you saved in
+ [task 3](#3_start_the_qsim_docker_container_on_your_virtual_machine).
+ 5. Change the part of your URL that read `127.0.0.1` to `localhost`.
+
+ 6. Click the **Connect** button in the Local connection settings window.
+
+ When your connection is ready, Colab displays a green checkmark beside the
+ Connected (Local) drop-down menu.
+
+
+
+ The code cells in your notebook now execute on your VM instead of your local
+ computer.
+
+* {Jupyter}
+
+ You can run your simulation directly in your Docker container, in Jupyter.
+ Jupyter runs in the qsim Docker container.
+
+ 1. Open a browser window.
+ 2. In the navigation bar, paste the URL that you copied in [task
+ 3](#3_start_the_qsim_docker_container_on_your_virtual_machine).
+ 3. In the browser you should now see the Jupyter UI, running on your VM.
+
+ The code that you execute here executes on your VM. You can navigate to qsim >
+ docs > tutorials to try other tutorials.
+
+* {Command line}
+
+ You can run a quantum simulation using qsim from the command line.
+ Your code runs in the qsim Docker container.
+
+ **Before you begin**
+
+ For this scenario, you can connect to your machine directly over SSH
+ rather than create a tunnel. In [task 2, step 3](#2_prepare_your_computer)
+ above, remove the second half of the command. Instead of this command:
+
+ ```
+ gcloud compute ssh [YOUR_INSTANCE_NAME] -- -L 8888:localhost:8888
+ ```
+
+ Run:
+
+ ```
+ gcloud compute ssh [YOUR_INSTANCE_NAME]
+ ```
+
+ Either command works for the purpose of this tutorial. Continue to task 4 then
+ complete the steps below, regardless of which command you use.
+
+ **1. Copy the container ID for your qsim Docker container**
+
+ Run `docker ps` to display the container ID. The output should look like
+ the following text:
+
+ ```
+ CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES i
+ 8ab217d640a3 gcr.io/quantum-291919/jupyter_qsim:latest "jupyter-notebook --…" 2 hours ago Up 2 hours 0.0.0.0:8888->8888/tcp dazzling_lovelace.
+ ```
+
+ In this case, the container ID is `8ab217d640a3`.
+
+ **2. Connect to your qsim Docker container**
+
+ Run `docker exec` to login to your container. Replace `[CONTAINER_ID]`
+ with the ID that you copied in step 1.
+
+ ```
+ docker exec -it [CONTAINER_ID] /bin/bash
+ ```
+
+ Your command prompt now executes commands in the container.
+
+ **3. Verify your installation**
+
+ You can use the code below to verify that qsim uses your qsim installation.
+ You can paste the code directly into the REPL, or paste the code in a
+ file.
+
+ ```
+ # Import Cirq and qsim
+ import cirq
+ import qsimcirq
+
+ # Instantiate qubits and create a circuit
+ q0, q1 = cirq.LineQubit.range(2)
+ circuit = cirq.Circuit(cirq.H(q0), cirq.CX(q0, q1))
+
+ # Instantiate a simulator that uses the GPU
+ qsim_simulator = qsimcirq.QSimSimulator()
+
+ # Run the simulation
+ print("Running simulation for the following circuit:")
+ print(circuit)
+
+ qsim_results = qsim_simulator.compute_amplitudes(
+ circuit, bitstrings=[0b00, 0b01])
+
+ print("qsim results:")
+ print(qsim_results)
+ ```
+
+ After a moment, you should see a result that looks similar to the following.
+
+ ```
+ [(0.7071067690849304+0j), 0j]
+ ```
+
+## Next steps
+
+After you finish, don't forget to stop or delete your VM on the Compute
+Instances dashboard to prevent further billing.
+
+You are now ready to run your own large simulations on Google Cloud. If you want
+to try a large circuit on Google Cloud, you can connect the
+[Simulate a large quantum circuit](https://colab.sandbox.google.com/github/quantumlib/qsim/blob/master/docs/tutorials/q32d14.ipynb)
+Colab notebook to your VM
+([documentation](https://quantumai.google/qsim/tutorials/q32d14)).
+
+For more information about managing your VM, see the following documentation
+from Google Cloud:
+
+* [Stopping and starting a VM](https://cloud.google.com/compute/docs/instances/stop-start-instance)
+* [Suspending and resuming an instance](https://cloud.google.com/compute/docs/instances/suspend-resume-instance)
+* [Deleting a VM instance](https://cloud.google.com/compute/docs/instances/deleting-instance)
+
+As an alternative to Google Cloud, you can download the Docker container or the
+qsim source code to run quantum simulations on your own high-performance
+computing platform.
diff --git a/docs/tutorials/gcp_gpu.md b/docs/tutorials/gcp_gpu.md
new file mode 100644
index 00000000..cf57e48d
--- /dev/null
+++ b/docs/tutorials/gcp_gpu.md
@@ -0,0 +1,209 @@
+# GPU-based quantum simulation on Google Cloud
+
+In this tutorial, you configure and test a virtual machine (VM) to run GPU-based
+quantum simulations on Google Cloud.
+
+Note: The later steps in this tutorial require you to enter several commands at the
+command line. Some commands might require you to add `sudo` before the command.
+For example, if a step asks you to type `icecream -fancy`, you might need to
+type `sudo icecream -fancy`.
+
+## 1. Create a virtual machine
+
+Follow the instructions in the
+[Quickstart using a Linux VM](https://cloud.google.com/compute/docs/quickstart-linux)
+guide to create a VM. In addition to the guidance specified in the Create a Linux VM
+instance section, ensure that your VM has the following properties:
+
+* In the **Machine Configuration** section:
+ 1. Select the tab for the **GPU** machine family.
+ 2. In the **GPU type** option, choose **NVIDIA Tesla A100**.
+ 3. In the **Number of GPUs** option, choose **1**.
+* In the **Boot disk** section, click the **Change** button:
+ 1. In the **Operating System** option, choose **Ubuntu**.
+ 2. In the **Version** option, choose **20.04 LTS**.
+ 3. In the **Size** field, enter **30** (minimum).
+* The instructions above override steps 3 through 5 in the [Create a Linux VM
+ instance](https://cloud.google.com/compute/docs/quickstart-linux)
+ Quickstart.
+* In the **Firewall** section, ensure that both the **Allow HTTP traffic**
+ checkbox and the **Allow HTTPS traffic** checkboxs are selected.
+
+When Google Cloud finishes creating the VM, you can see your VM listed in the
+[Compute Instances dashboard](https://pantheon.corp.google.com/compute/instances)
+for your project.
+
+### Find out more
+
+* [Choosing hardware for your qsim simulation](/qsim/choose_hw)
+* [Choosing the right machine family and type](https://cloud.google.com/blog/products/compute/choose-the-right-google-compute-engine-machine-type-for-you)
+* [Creating a VM with attached GPUs](https://cloud.google.com/compute/docs/gpus/create-vm-with-gpus#create-new-gpu-vm)
+
+## 2. Prepare your computer
+
+Use SSH in the `glcoud` tool to communicate with your VM.
+
+1. Install the `gcloud` command line tool. Follow the instructions in the
+ [Installing Cloud SDK](https://cloud.google.com/sdk/docs/install)
+ documentation.
+2. After installation, run the `gcloud init` command to initialize the Google
+ Cloud environment. You need to provide the `gcloud` tool with details
+ about your VM, such as the project name and the region where your VM is
+ located.
+ 1. You can verify your environment by using the `gcloud config list`
+ command.
+3. Connect to your VM by using SSH. Replace `[YOUR_INSTANCE_NAME]` with the
+ name of your VM.
+
+ ```shell
+ gcloud compute ssh [YOUR_INSTANCE_NAME]
+ ```
+
+When the command completes successfully, your prompt changes from your local
+machine to your virtual machine.
+
+## 3. Enable your virtual machine to use the GPU
+
+1. Install the GPU driver. Complete the steps provided in the following
+ sections of the [Installing GPU
+ drivers](https://cloud.google.com/compute/docs/gpus/install-drivers-gpu):
+ guide:
+ * [Examples](https://cloud.google.com/compute/docs/gpus/install-drivers-gpu#examples),
+ under the **Ubuntu** tab. For step 3, only perform the steps for
+ **Ubuntu 20.04** (steps 3a through 3f).
+ * [Verifying the GPU driver install](https://cloud.google.com/compute/docs/gpus/install-drivers-gpu#verify-driver-install)
+2. Install the CUDA toolkit.
+
+ ```shell
+ sudo apt install -y nvidia-cuda-toolkit
+ ```
+
+3. Add your CUDA toolkit to the environment search path.
+ 1. Discover the directory of the CUDA toolkit that you installed.
+
+ ```shell
+ ls /usr/local
+ ```
+
+ The toolkit is the highest number that looks like the pattern
+ `cuda-XX.Y`. The output of the command should resemble the
+ following:
+
+ ```shell
+ bin cuda cuda-11 cuda-11.4 etc games include lib man sbin share src
+ ```
+
+ In this case, the directory is `cuda-11.4`.
+ 2. Add the CUDA toolkit path to your environment. You can run the following
+ command to append the path to your `~/.bashrc` file. Replace `[DIR]`
+ with the CUDA directory that you discovered in the previous step.
+
+ ```shell
+ echo "export PATH=/usr/local/[DIR]/bin${PATH:+:${PATH}}" >> ~/.bashrc
+ ```
+
+ 3. Run `source ~/.bashrc` to activate the new environment search path
+
+## 4. Install build tools
+
+Install the tools required to build qsim. This step might take a few minutes to
+complete.
+
+```shell
+sudo apt install cmake && sudo apt install pip && pip install pybind11
+```
+
+
+## 5. Create a GPU-enabled version of qsim
+
+1. Clone the qsim repository.
+
+ ```shell
+ git clone https://github.com/quantumlib/qsim.git
+ ```
+
+2. Run `cd qsim` to change your working directory to qsim.
+3. Run `make` to compile qsim. When make detects the CUDA toolkit during
+ compilation, make builds the GPU version of qsim automatically.
+4. Run `pip install .` to install your local version of qsimcirq.
+5. Verify your qsim installation.
+
+ ```shell
+ python3 -c "import qsimcirq; print(qsimcirq.qsim_gpu)"
+ ```
+
+ If the installation completed successfully, the output from the command
+ should resemble the following:
+
+ ```none
+
+ ```
+
+
+## 6. Verify your installation
+
+You can use the following code to verify that qsim uses your GPU. You can paste
+the code directly into the REPL, or paste the code in a file.
+
+```
+# Import Cirq and qsim
+import cirq
+import qsimcirq
+
+# Instantiate qubits and create a circuit
+q0, q1 = cirq.LineQubit.range(2)
+circuit = cirq.Circuit(cirq.H(q0), cirq.CX(q0, q1))
+
+# Instantiate a simulator that uses the GPU
+gpu_options = qsimcirq.QSimOptions(use_gpu=True)
+qsim_simulator = qsimcirq.QSimSimulator(qsim_options=gpu_options)
+
+# Run the simulation
+print("Running simulation for the following circuit:")
+print(circuit)
+
+qsim_results = qsim_simulator.compute_amplitudes(
+ circuit, bitstrings=[0b00, 0b01])
+
+print("qsim results:")
+print(qsim_results)
+```
+
+After a moment, you should see a result that looks similar to the following.
+
+```none
+[(0.7071067690849304+0j), 0j]
+```
+
+### Optional: Use the NVIDIA cuQuantum SDK
+
+If you have the [NVIDIA cuQuantum SDK](https://developer.nvidia.com/cuquantum-sdk)
+installed (instructions are provided
+[here](https://docs.nvidia.com/cuda/cuquantum/custatevec/html/getting_started.html#installation-and-compilation),
+cuStateVec v1.0.0 or higher is required),
+you can use it with this tutorial. Before building qsim in step 5,
+set the `CUQUANTUM_DIR` environment variable from the command line:
+
+```bash
+export CUQUANTUM_DIR=[PATH_TO_CUQUANTUM_SDK]
+```
+
+Once you have built qsim, modify the `gpu_options` line like so:
+
+```python
+gpu_options = qsimcirq.QSimOptions(use_gpu=True, gpu_mode=1)
+```
+
+This instructs qsim to make use of its cuQuantum integration, which provides
+improved performance on NVIDIA GPUs. If you experience issues with this
+option, please file an issue on the qsim repository.
+
+
+## Next steps
+
+After you finish, don't forget to stop or delete your VM on the Compute
+Instances dashboard to prevent further billing.
+
+You are now ready to run your own large simulations on Google Cloud. For sample
+code of a large circuit, see the [Simulate a large
+circuit](https://quantumai.google/qsim/tutorials/q32d14) tutorial.
diff --git a/docs/tutorials/multinode.md b/docs/tutorials/multinode.md
new file mode 100644
index 00000000..8d425cc2
--- /dev/null
+++ b/docs/tutorials/multinode.md
@@ -0,0 +1,333 @@
+# Multinode quantum simulation using HTCondor on GCP
+
+In this tutorial, you will configure HTCondor to run multiple simulations of a
+quantum circuit in parallel across multiple nodes. This method can be used to
+accelerate Monte Carlo simulations of noisy quantum circuits.
+
+Objectives of this tutorial:
+
+* Use `terraform` to deploy a HTCondor cluster
+* Run a multinode simulation using HTCondor
+* Query cluster information and monitor running jobs in HTCondor
+* Use `terraform` to destroy the cluster
+
+## 1. Configure your environment
+
+Although this tutorial can be run from your local computer, we recommend the use
+of [Google Cloud Shell](https://cloud.google.com/shell). Cloud Shell has many useful tools pre-installed.
+
+Once you have completed the [Before you begin](./gcp_before_you_begin.md)
+tutorial, open the [Cloud Shell in the Cloud Console](https://console.cloud.google.com/home/dashboard?cloudshell=true).
+
+### Clone this repo
+
+In your Cloud Shell window, clone this Github repo.
+
+``` bash
+git clone https://github.com/quantumlib/qsim.git
+```
+
+If you get an error saying something like `qsim already exists`, you may need
+to delete the `qsim` directory with `rm -rf qsim` and rerun the clone command.
+
+### Change directory
+
+Change directory to the tutorial:
+
+``` bash
+cd qsim/docs/tutorials/multinode/terraform
+```
+
+This is where you will use `terraform` to create the HTCondor cluster required to run your jobs.
+
+### Edit `init.sh` file to match your environment
+
+Using your favorite text file editor, open the `init.sh` file. The first few
+lines should look like this:
+
+```bash
+# ---- Edit below -----#
+
+export TF_VAR_project=[USER_PROJECT]
+export TF_VAR_zone=us-east4-c
+export TF_VAR_region=us-east4
+```
+
+Replace `[USER_PROJECT]` with the project name you chose on the
+`Before you begin` page.
+
+The other lines can optionally be modified to adjust your environment.
+* The `TF_VAR_zone` and `TF_VAR_region` lines can be modified to select where
+your project will create new jobs.
+
+#### Find out more
+
+* [Choosing a zone and region](https://cloud.google.com/compute/docs/regions-zones)
+
+### Source the `init.sh` file
+
+The edited `init.sh` file should be "sourced" in the cloud shell:
+
+``` bash
+source init.sh
+```
+
+Respond `Agree` to any pop-ups that request permissions on the Google Cloud platform.
+
+The final outcome of this script will include:
+
+* A gcloud config setup correctly
+* A service account created
+* The appropriate permissions assigned to the service account
+* A key file created to enable the use of Google Cloud automation.
+
+This will take up to 60 seconds. At the end you will see output about
+permissions and the configuration of the account.
+
+## 2. Run terraform
+
+After the previous steps are completed, you can initialize `terraform` to begin
+your cluster creation. The first step is to initialize the `terraform` state.
+``` bash
+terraform init
+```
+A successful result will contain the text:
+```
+Terraform has been successfully initialized!
+```
+
+### Run the `make` command
+
+For convenience, some terraform commands are prepared in a `Makefile`. This
+means you can now create your cluster, with a simple `make` command.
+
+```bash
+make apply
+```
+
+A successful run will show:
+
+```
+Apply complete! Resources: 4 added, 0 changed, 0 destroyed.
+```
+
+## 3. Connect to the submit node for HTCondor
+
+Although there are ways to run HTCondor commands from your local machine,
+the normal path is to login to the submit node. From there you can run
+commands to submit and monitor jobs on HTCondor.
+
+### List VMs that were created by HTCondor
+
+To see the VMs created by HTCondor, run:
+
+```bash
+gcloud compute instances list
+```
+
+At this point in the tutorial, you will see two instances listed:
+
+```
+NAME: c-manager
+ZONE: us-central1-a
+MACHINE_TYPE: n1-standard-1
+PREEMPTIBLE:
+INTERNAL_IP: X.X.X.X
+EXTERNAL_IP: X.X.X.X
+STATUS: RUNNING
+
+NAME: c-submit
+ZONE: us-central1-a
+MACHINE_TYPE: n1-standard-1
+PREEMPTIBLE:
+INTERNAL_IP: X.X.X.X
+EXTERNAL_IP: X.X.X.X
+STATUS: RUNNING
+```
+
+### Connecting to the submit node
+
+To connect to the submit node, click the `Compute Engine` item on the Cloud
+dashboard. This will open the VM Instances page, where you should see the two
+instances listed above. In the `c-submit` row, click on the `SSH` button to
+open a new window connected to the submit node. During this step, you may see a
+prompt that reads `Connection via Cloud Identity-Aware Proxy Failed`; simply
+click on `Connect without Identity-Aware Proxy` and the connection should
+complete.
+
+This new window is logged into your HTCondor cluster. You will see a command
+prompt that looks something like this:
+
+```bash
+[mylogin@c-submit ~]$
+```
+
+The following steps should be performed in this window.
+
+### Checking the status
+
+You can run `condor_q` to verify if the HTCondor install is completed. The output should look something like this:
+
+```
+-- Schedd: c-submit.c.quantum-htcondor-14.internal : <10.150.0.2:9618?... @ 08/18/21 18:37:50
+OWNER BATCH_NAME SUBMITTED DONE RUN IDLE HOLD TOTAL JOB_IDS
+
+Total for query: 0 jobs; 0 completed, 0 removed, 0 idle, 0 running, 0 held, 0 suspended
+Total for drj: 0 jobs; 0 completed, 0 removed, 0 idle, 0 running, 0 held, 0 suspended
+Total for all users: 0 jobs; 0 completed, 0 removed, 0 idle, 0 running, 0 held, 0 suspended
+```
+
+If you get `command not found`, you will need to wait a few minutes for the HTCondor install to complete.
+
+## 4. Get the sample code and run it
+
+The HTCondor cluster is now ready for your jobs to be run. For this tutorial,
+sample jobs have been provided in the Github repo.
+
+### Clone the repo on your cluster
+
+On the submit node, you can clone the repo to get access to previously
+created submission files:
+
+```bash
+git clone https://github.com/quantumlib/qsim.git
+```
+
+Then cd to the tutorial directory.
+
+```bash
+cd qsim/docs/tutorials/multinode
+```
+
+### Submit a job
+
+Now it is possible to submit a job:
+```
+condor_submit noiseless.sub
+```
+This job will run the code in `noiseless3.py`, which executes a simple circuit and prints the results as a histogram. If successful, the output will be:
+```
+Submitting job(s).
+1 job(s) submitted to cluster 1.
+```
+You can see the job in queue with the `condor_q` command.
+
+The job will take several minutes to finish. The time includes creating a VM
+compute node, installing the HTCondor system and running the job. When complete, the following files will be stored in the `out` directory:
+
+* `out/log.1-0` contains a progress log for the job as it executes.
+* `out/out.1-0` contains the final output of the job.
+* `out/err.1-0` contains any error reports. This should be empty.
+
+To view one of these files in the shell, you can run `cat out/[FILE]`,
+replacing `[FILE]` with the name of the file to be viewed.
+
+## 5. Run multinode noise simulations
+
+Noise simulations make use of a [Monte Carlo
+method](https://en.wikipedia.org/wiki/Monte_Carlo_method) for [quantum
+trajectories](https://en.wikipedia.org/wiki/Quantum_Trajectory_Theory).
+
+### The noise.sub file
+
+To run multiple simulations, you can define a "submit" file. `noise.sub` is
+an example of this file format, and is shown below. Notable features include:
+
+* `universe = docker` means that all jobs will run inside a `docker` container.
+* `queue 50` submits 50 separate copies of the job.
+
+```
+universe = docker
+docker_image = gcr.io/quantum-builds/github.com/quantumlib/jupyter_qsim:latest
+arguments = python3 noise3.py
+should_transfer_files = YES
+transfer_input_files = noise3.py
+when_to_transfer_output = ON_EXIT
+output = out/out.$(Cluster)-$(Process)
+error = out/err.$(Cluster)-$(Process)
+log = out/log.$(Cluster)-$(Process)
+request_memory = 10GB
+queue 50
+```
+The job can be submitted with the `condor_submit` command.
+```
+condor_submit noise.sub
+```
+The output should look like this:
+```
+Submitting job(s)..................................................
+50 job(s) submitted to cluster 2.
+```
+To monitor the ongoing process of jobs running, you can take advantage of the
+Linux `watch` command to run `condor_q` repeatedly:
+```
+watch "condor_q; condor_status"
+```
+The output of this command will show you the jobs in the queue as well as the
+VMs being created to run the jobs. There is a limit of 20 VMs for this
+configuration of the cluster.
+
+When the queue is empty, the command can be stopped with CTRL-C.
+
+The output from all trajectories will be stored in the `out` directory. To see
+the results of all simulations together, you can run:
+```
+cat out/out.2-*
+```
+The output should look something like this:
+```
+Counter({3: 462, 0: 452, 2: 50, 1: 36})
+Counter({0: 475, 3: 435, 1: 49, 2: 41})
+Counter({0: 450, 3: 440, 1: 59, 2: 51})
+Counter({0: 459, 3: 453, 2: 51, 1: 37})
+Counter({3: 471, 0: 450, 2: 46, 1: 33})
+Counter({3: 467, 0: 441, 1: 54, 2: 38})
+Counter({3: 455, 0: 455, 1: 50, 2: 40})
+Counter({3: 466, 0: 442, 2: 51, 1: 41})
+.
+.
+.
+```
+
+## 6. Shutting down
+
+**IMPORTANT**: To avoid excess billing for this project, it is important to
+shut down the cluster. Return to the Cloud dashboard window for the steps below.
+
+If your Cloud Shell is still open, simply run:
+```
+make destroy
+```
+If your Cloud Shell closed at any point, you'll need to re-initialize it.
+[Open a new shell](https://console.cloud.google.com/home/dashboard?cloudshell=true)
+and run:
+```
+cd qsim/docs/tutorials/multinode/terraform
+source init.sh
+make destroy
+```
+After these commands complete, check the Compute Instances dashboard to verify
+that all VMs have been shut down. This tutorial makes use of an experimental
+[autoscaling script](./terraform/htcondor/autoscaler.py) to bring up and turn
+down VMs as needed. If any VMs remain after several minutes, you may need to
+shut them down manually, as described in the next section.
+
+## Next steps
+
+The file being run in the previous example was `noise3.py`. To run your own
+simulations, simply create a new python file with your circuit and change the
+`noise3.py` references in `noise.sub` to point to the new file.
+
+A detailed discussion of how to construct various types of noise in Cirq can be
+found [here](https://quantumai.google/cirq/noise).
+
+For more information about managing your VMs, see the following documentation
+from Google Cloud:
+
+* [Stopping and starting a VM](https://cloud.google.com/compute/docs/instances/stop-start-instance)
+* [Suspending and resuming an instance](https://cloud.google.com/compute/docs/instances/suspend-resume-instance)
+* [Deleting a VM instance](https://cloud.google.com/compute/docs/instances/deleting-instance)
+
+As an alternative to Google Cloud, you can download the Docker container or the
+qsim source code to run quantum simulations on your own high-performance
+computing platform.
diff --git a/docs/tutorials/multinode/noise.sub b/docs/tutorials/multinode/noise.sub
new file mode 100644
index 00000000..cccc85b0
--- /dev/null
+++ b/docs/tutorials/multinode/noise.sub
@@ -0,0 +1,11 @@
+universe = docker
+docker_image = gcr.io/quantum-builds/github.com/quantumlib/jupyter_qsim:latest
+arguments = python3 noise3.py
+should_transfer_files = YES
+transfer_input_files = noise3.py
+when_to_transfer_output = ON_EXIT
+output = out/out.$(Cluster)-$(Process)
+error = out/err.$(Cluster)-$(Process)
+log = out/log.$(Cluster)-$(Process)
+request_memory = 10GB
+queue 50
\ No newline at end of file
diff --git a/docs/tutorials/multinode/noise3.py b/docs/tutorials/multinode/noise3.py
new file mode 100644
index 00000000..f3abbdd5
--- /dev/null
+++ b/docs/tutorials/multinode/noise3.py
@@ -0,0 +1,18 @@
+import cirq, qsimcirq
+
+# Create a Bell state, |00) + |11)
+q0, q1 = cirq.LineQubit.range(2)
+circuit = cirq.Circuit(cirq.H(q0), cirq.CNOT(q0, q1), cirq.measure(q0, q1, key="m"))
+
+# Constructs a noise model that adds depolarizing noise after each gate.
+noise = cirq.NoiseModel.from_noise_model_like(cirq.depolarize(p=0.05))
+
+# Use the noise model to create a noisy circuit.
+noisy_circuit = cirq.Circuit(noise.noisy_moments(circuit, system_qubits=[q0, q1]))
+
+sim = qsimcirq.QSimSimulator()
+result = sim.run(noisy_circuit, repetitions=1000)
+# Outputs a histogram dict of result:count pairs.
+# Expected result is a bunch of 0s and 3s, with fewer 1s and 2s.
+# (For comparison, the noiseless circuit will only have 0s and 3s)
+print(result.histogram(key="m"))
diff --git a/docs/tutorials/multinode/noiseless.sub b/docs/tutorials/multinode/noiseless.sub
new file mode 100644
index 00000000..f4bde2c8
--- /dev/null
+++ b/docs/tutorials/multinode/noiseless.sub
@@ -0,0 +1,11 @@
+universe = docker
+docker_image = gcr.io/quantum-builds/github.com/quantumlib/jupyter_qsim:latest
+arguments = python3 noiseless3.py
+should_transfer_files = YES
+transfer_input_files = noiseless3.py
+when_to_transfer_output = ON_EXIT
+output = out/out.$(Cluster)-$(Process)
+error = out/err.$(Cluster)-$(Process)
+log = out/log.$(Cluster)-$(Process)
+request_memory = 10GB
+queue 1
\ No newline at end of file
diff --git a/docs/tutorials/multinode/noiseless3.py b/docs/tutorials/multinode/noiseless3.py
new file mode 100644
index 00000000..f35dcb5c
--- /dev/null
+++ b/docs/tutorials/multinode/noiseless3.py
@@ -0,0 +1,11 @@
+import cirq, qsimcirq
+
+# Create a Bell state, |00) + |11)
+q0, q1 = cirq.LineQubit.range(2)
+circuit = cirq.Circuit(cirq.H(q0), cirq.CNOT(q0, q1), cirq.measure(q0, q1, key="m"))
+
+sim = qsimcirq.QSimSimulator()
+result = sim.run(circuit, repetitions=1000)
+# Outputs a histogram dict of result:count pairs.
+# Expected result is a bunch of 0s and 3s, with no 1s or 2s.
+print(result.histogram(key="m"))
diff --git a/docs/tutorials/multinode/out/placeholder b/docs/tutorials/multinode/out/placeholder
new file mode 100644
index 00000000..e69de29b
diff --git a/docs/tutorials/multinode/terraform/Makefile b/docs/tutorials/multinode/terraform/Makefile
new file mode 100644
index 00000000..ebd5018d
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/Makefile
@@ -0,0 +1,7 @@
+.PHONY: apply destroy
+
+apply:
+ terraform apply -auto-approve
+
+destroy:
+ terraform destroy -auto-approve
diff --git a/docs/tutorials/multinode/terraform/README.md b/docs/tutorials/multinode/terraform/README.md
new file mode 100644
index 00000000..094f1ee8
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/README.md
@@ -0,0 +1,3 @@
+# Multinode quantum simulation using HTCondor on GCP
+
+Please refer to the [README in the parent directory](../README.md).
\ No newline at end of file
diff --git a/docs/tutorials/multinode/terraform/htcondor/autoscaler.py b/docs/tutorials/multinode/terraform/htcondor/autoscaler.py
new file mode 100644
index 00000000..7795d8c9
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/htcondor/autoscaler.py
@@ -0,0 +1,378 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script for resizing managed instance group (MIG) cluster size based
+# on the number of jobs in the Condor Queue.
+
+from absl import app
+from absl import flags
+from pprint import pprint
+from googleapiclient import discovery
+from oauth2client.client import GoogleCredentials
+
+import os
+import math
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--p", required=True, help="Project id", type=str)
+parser.add_argument(
+ "--z",
+ required=True,
+ help="Name of GCP zone where the managed instance group is located",
+ type=str,
+)
+parser.add_argument(
+ "--r",
+ required=True,
+ help="Name of GCP region where the managed instance group is located",
+ type=str,
+)
+parser.add_argument(
+ "--mz",
+ required=False,
+ help="Enabled multizone (regional) managed instance group",
+ action="store_true",
+)
+parser.add_argument(
+ "--g", required=True, help="Name of the managed instance group", type=str
+)
+parser.add_argument(
+ "--c", required=True, help="Maximum number of compute instances", type=int
+)
+parser.add_argument(
+ "--v",
+ default=0,
+ help="Increase output verbosity. 1-show basic debug info. 2-show detail debug info",
+ type=int,
+ choices=[0, 1, 2],
+)
+parser.add_argument(
+ "--d",
+ default=0,
+ help="Dry Run, default=0, if 1, then no scaling actions",
+ type=int,
+ choices=[0, 1],
+)
+
+
+args = parser.parse_args()
+
+
+class AutoScaler:
+ def __init__(self, multizone=False):
+
+ self.multizone = multizone
+ # Obtain credentials
+ self.credentials = GoogleCredentials.get_application_default()
+ self.service = discovery.build("compute", "v1", credentials=self.credentials)
+
+ if self.multizone:
+ self.instanceGroupManagers = self.service.regionInstanceGroupManagers()
+ else:
+ self.instanceGroupManagers = self.service.instanceGroupManagers()
+
+ # Remove specified instance from MIG and decrease MIG size
+ def deleteFromMig(self, instance):
+ instanceUrl = "https://www.googleapis.com/compute/v1/projects/" + self.project
+ if self.multizone:
+ instanceUrl += "/regions/" + self.region
+ else:
+ instanceUrl += "/zones/" + self.zone
+ instanceUrl += "/instances/" + instance
+
+ instances_to_delete = {"instances": [instanceUrl]}
+
+ requestDelInstance = self.instanceGroupManagers.deleteInstances(
+ project=self.project,
+ **self.zoneargs,
+ instanceGroupManager=self.instance_group_manager,
+ body=instances_to_delete,
+ )
+
+ # execute if not a dry-run
+ if not self.dryrun:
+ response = requestDelInstance.execute()
+ if self.debug > 0:
+ print("Request to delete instance " + instance)
+ pprint(response)
+ return response
+ return "Dry Run"
+
+ def getInstanceTemplateInfo(self):
+ requestTemplateName = self.instanceGroupManagers.get(
+ project=self.project,
+ **self.zoneargs,
+ instanceGroupManager=self.instance_group_manager,
+ fields="instanceTemplate",
+ )
+ responseTemplateName = requestTemplateName.execute()
+ template_name = ""
+
+ if self.debug > 1:
+ print("Request for the template name")
+ pprint(responseTemplateName)
+
+ if len(responseTemplateName) > 0:
+ template_url = responseTemplateName.get("instanceTemplate")
+ template_url_partitioned = template_url.split("/")
+ template_name = template_url_partitioned[len(template_url_partitioned) - 1]
+
+ requestInstanceTemplate = self.service.instanceTemplates().get(
+ project=self.project, instanceTemplate=template_name, fields="properties"
+ )
+ responseInstanceTemplateInfo = requestInstanceTemplate.execute()
+
+ if self.debug > 1:
+ print("Template information")
+ pprint(responseInstanceTemplateInfo["properties"])
+
+ machine_type = responseInstanceTemplateInfo["properties"]["machineType"]
+ is_preemtible = responseInstanceTemplateInfo["properties"]["scheduling"][
+ "preemptible"
+ ]
+ if self.debug > 0:
+ print("Machine Type: " + machine_type)
+ print("Is preemtible: " + str(is_preemtible))
+ request = self.service.machineTypes().get(
+ project=self.project, zone=self.zone, machineType=machine_type
+ )
+ response = request.execute()
+ guest_cpus = response["guestCpus"]
+ if self.debug > 1:
+ print("Machine information")
+ pprint(responseInstanceTemplateInfo["properties"])
+ if self.debug > 0:
+ print("Guest CPUs: " + str(guest_cpus))
+
+ instanceTemlateInfo = {
+ "machine_type": machine_type,
+ "is_preemtible": is_preemtible,
+ "guest_cpus": guest_cpus,
+ }
+ return instanceTemlateInfo
+
+ def scale(self):
+ # diagnosis
+ if self.debug > 1:
+ print("Launching autoscaler.py with the following arguments:")
+ print("project_id: " + self.project)
+ print("zone: " + self.zone)
+ print("region: " + self.region)
+ print(f"multizone: {self.multizone}")
+ print("group_manager: " + self.instance_group_manager)
+ print("computeinstancelimit: " + str(self.compute_instance_limit))
+ print("debuglevel: " + str(self.debug))
+
+ if self.multizone:
+ self.zoneargs = {"region": self.region}
+ else:
+ self.zoneargs = {"zone": self.zone}
+
+ # Get total number of jobs in the queue that includes number of jos waiting as well as number of jobs already assigned to nodes
+ queue_length_req = (
+ 'condor_q -totals -format "%d " Jobs -format "%d " Idle -format "%d " Held'
+ )
+ queue_length_resp = os.popen(queue_length_req).read().split()
+
+ if len(queue_length_resp) > 1:
+ queue = int(queue_length_resp[0])
+ idle_jobs = int(queue_length_resp[1])
+ on_hold_jobs = int(queue_length_resp[2])
+ else:
+ queue = 0
+ idle_jobs = 0
+ on_hold_jobs = 0
+
+ print("Total queue length: " + str(queue))
+ print("Idle jobs: " + str(idle_jobs))
+ print("Jobs on hold: " + str(on_hold_jobs))
+
+ instanceTemlateInfo = self.getInstanceTemplateInfo()
+ if self.debug > 1:
+ print("Information about the compute instance template")
+ pprint(instanceTemlateInfo)
+
+ self.cores_per_node = instanceTemlateInfo["guest_cpus"]
+ print("Number of CPU per compute node: " + str(self.cores_per_node))
+
+ # Get state for for all jobs in Condor
+ name_req = "condor_status -af name state"
+ slot_names = os.popen(name_req).read().splitlines()
+ if self.debug > 1:
+ print("Currently running jobs in Condor")
+ print(slot_names)
+
+ # Adjust current queue length by the number of jos that are on-hold
+ queue -= on_hold_jobs
+ if on_hold_jobs > 0:
+ print("Adjusted queue length: " + str(queue))
+
+ # Calculate number instances to satisfy current job queue length
+ if queue > 0:
+ self.size = int(math.ceil(float(queue) / float(self.cores_per_node)))
+ if self.debug > 0:
+ print(
+ "Calucalting size of MIG: ⌈"
+ + str(queue)
+ + "/"
+ + str(self.cores_per_node)
+ + "⌉ = "
+ + str(self.size)
+ )
+ else:
+ self.size = 0
+
+ # If compute instance limit is specified, can not start more instances then specified in the limit
+ if self.compute_instance_limit > 0 and self.size > self.compute_instance_limit:
+ self.size = self.compute_instance_limit
+ print(
+ "MIG target size will be limited by " + str(self.compute_instance_limit)
+ )
+
+ print("New MIG target size: " + str(self.size))
+
+ # Get current number of instances in the MIG
+ requestGroupInfo = self.instanceGroupManagers.get(
+ project=self.project,
+ **self.zoneargs,
+ instanceGroupManager=self.instance_group_manager,
+ )
+ responseGroupInfo = requestGroupInfo.execute()
+ currentTarget = int(responseGroupInfo["targetSize"])
+ print("Current MIG target size: " + str(currentTarget))
+
+ if self.debug > 1:
+ print("MIG Information:")
+ print(responseGroupInfo)
+
+ if self.size == 0 and currentTarget == 0:
+ print(
+ "No jobs in the queue and no compute instances running. Nothing to do"
+ )
+ exit()
+
+ if self.size == currentTarget:
+ print(
+ "Running correct number of compute nodes to handle number of jobs in the queue"
+ )
+ exit()
+
+ if self.size < currentTarget:
+ print("Scaling down. Looking for nodes that can be shut down")
+ # Find nodes that are not busy (all slots showing status as "Unclaimed")
+
+ node_busy = {}
+ for slot_name in slot_names:
+ name_status = slot_name.split()
+ if len(name_status) > 1:
+ name = name_status[0]
+ status = name_status[1]
+ slot = "NO-SLOT"
+ slot_server = name.split("@")
+ if len(slot_server) > 1:
+ slot = slot_server[0]
+ server = slot_server[1].split(".")[0]
+ else:
+ server = slot_server[0].split(".")[0]
+
+ if self.debug > 0:
+ print(slot + ", " + server + ", " + status + "\n")
+
+ if server not in node_busy:
+ if status == "Unclaimed":
+ node_busy[server] = False
+ else:
+ node_busy[server] = True
+ else:
+ if status != "Unclaimed":
+ node_busy[server] = True
+
+ if self.debug > 1:
+ print("Compuute node busy status:")
+ print(node_busy)
+
+ # Shut down nodes that are not busy
+ for node in node_busy:
+ if not node_busy[node]:
+ print("Will shut down: " + node + " ...")
+ respDel = self.deleteFromMig(node)
+ if self.debug > 1:
+ print("Shut down request for compute node " + node)
+ pprint(respDel)
+
+ if self.debug > 1:
+ print("Scaling down complete")
+
+ if self.size > currentTarget:
+ print(
+ "Scaling up. Need to increase number of instances to " + str(self.size)
+ )
+ # Request to resize
+ request = self.instanceGroupManagers.resize(
+ project=self.project,
+ **self.zoneargs,
+ instanceGroupManager=self.instance_group_manager,
+ size=self.size,
+ )
+ response = request.execute()
+ if self.debug > 1:
+ print("Requesting to increase MIG size")
+ pprint(response)
+ print("Scaling up complete")
+
+
+def main():
+
+ scaler = AutoScaler(args.mz)
+
+ # Project ID
+ scaler.project = args.p # Ex:'slurm-var-demo'
+
+ # Name of the zone where the managed instance group is located
+ scaler.zone = args.z # Ex: 'us-central1-f'
+
+ # Name of the region where the managed instance group is located
+ scaler.region = args.r # Ex: 'us-central1'
+
+ # The name of the managed instance group.
+ scaler.instance_group_manager = args.g # Ex: 'condor-compute-igm'
+
+ # Default number of cores per intance, will be replaced with actual value
+ scaler.cores_per_node = 4
+
+ # Default number of running instances that the managed instance group should maintain at any given time. This number will go up and down based on the load (number of jobs in the queue)
+ scaler.size = 0
+
+ # Dry run: : 0, run scaling; 1, only provide info.
+ scaler.dryrun = args.d > 0
+
+ # Debug level: 1-print debug information, 2 - print detail debug information
+ scaler.debug = 0
+ if args.v:
+ scaler.debug = args.v
+
+ # Limit for the maximum number of compute instance. If zero (default setting), no limit will be enforced by the script
+ scaler.compute_instance_limit = 0
+ if args.c:
+ scaler.compute_instance_limit = abs(args.c)
+
+ scaler.scale()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docs/tutorials/multinode/terraform/htcondor/noise.py b/docs/tutorials/multinode/terraform/htcondor/noise.py
new file mode 100644
index 00000000..b5542959
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/htcondor/noise.py
@@ -0,0 +1,20 @@
+#!/usr/bin/python3
+
+import cirq, qsimcirq
+
+# Create a Bell state, |00) + |11)
+q0, q1 = cirq.LineQubit.range(2)
+circuit = cirq.Circuit(cirq.H(q0), cirq.CNOT(q0, q1), cirq.measure(q0, q1, key="m"))
+
+# Constructs a noise model that adds depolarizing noise after each gate.
+noise = cirq.NoiseModel.from_noise_model_like(cirq.depolarize(p=0.05))
+
+# Use the noise model to create a noisy circuit.
+noisy_circuit = cirq.Circuit(noise.noisy_moments(circuit, system_qubits=[q0, q1]))
+
+sim = qsimcirq.QSimSimulator()
+result = sim.run(noisy_circuit, repetitions=1000)
+# Outputs a histogram dict of result:count pairs.
+# Expected result is a bunch of 0s and 3s, with fewer 1s and 2s.
+# (For comparison, the noiseless circuit will only have 0s and 3s)
+print(result.histogram(key="m"))
diff --git a/docs/tutorials/multinode/terraform/htcondor/noise.sub b/docs/tutorials/multinode/terraform/htcondor/noise.sub
new file mode 100644
index 00000000..aa5fff28
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/htcondor/noise.sub
@@ -0,0 +1,12 @@
+universe = docker
+docker_image = gcr.io/quantum-builds/github.com/quantumlib/jupyter_qsim
+executable = /usr/bin/python3
+arguments = noise.py
+transfer_input_files = noise.py
+should_transfer_files = YES
+when_to_transfer_output = ON_EXIT
+output = out/out.$(Cluster)-$(Process)
+error = out/err.$(Cluster)-$(Process)
+log = out/log.$(Cluster)-$(Process)
+request_memory = 1GB
+queue 100
diff --git a/docs/tutorials/multinode/terraform/htcondor/resources.tf b/docs/tutorials/multinode/terraform/htcondor/resources.tf
new file mode 100644
index 00000000..0b4212d9
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/htcondor/resources.tf
@@ -0,0 +1,384 @@
+variable "cluster_name" {
+ type = string
+ default = "condor"
+}
+variable "admin_email" {
+ type = string
+ default = ""
+}
+variable "osversion" {
+ type = string
+ default = "7"
+}
+variable "osimage" {
+ type = string
+ default = "hpc-centos-7"
+}
+variable "osproject" {
+ type = string
+ default = "cloud-hpc-image-public"
+}
+variable "condorversion" {
+ type = string
+ default = ""
+}
+variable "project" {
+ type = string
+}
+variable "zone" {
+ type = string
+}
+variable "region" {
+ type = string
+}
+variable "numzones" {
+ type = string
+}
+variable "multizone" {
+ type = bool
+}
+variable "min_replicas" {
+ type = number
+ default = 0
+}
+variable "max_replicas" {
+ type = number
+ default = 20
+}
+variable "use_preemptibles" {
+ type = bool
+ default = true
+}
+variable "metric_target_loadavg" {
+ type = number
+ default = "1.0"
+}
+variable "metric_target_queue" {
+ type = number
+ default = 10
+}
+variable "compute_instance_type" {
+ type = string
+ default = "n1-standard-1"
+}
+variable "instance_type" {
+ type = string
+ default = "n1-standard-1"
+}
+variable "service_account" {
+ type = string
+ default = "default"
+}
+locals{
+ autoscaler = file("${path.module}/autoscaler.py")
+ compute_startup = templatefile(
+ "${path.module}/startup-centos.sh",
+ {
+ "project" = var.project,
+ "cluster_name" = var.cluster_name,
+ "htserver_type" = "compute",
+ "osversion" = var.osversion,
+ "zone" = var.zone,
+ "region" = var.region,
+ "multizone" = var.multizone,
+ "condorversion" = var.condorversion,
+ "max_replicas" = var.max_replicas,
+ "autoscaler" = "",
+ "admin_email" = var.admin_email
+ })
+ submit_startup = templatefile(
+ "${path.module}/startup-centos.sh",
+ {
+ "project" = var.project,
+ "cluster_name" = var.cluster_name,
+ "htserver_type" = "submit",
+ "osversion" = var.osversion,
+ "condorversion" = var.condorversion,
+ "zone" = var.zone,
+ "region" = var.region,
+ "multizone" = var.multizone,
+ "max_replicas" = var.max_replicas,
+ "autoscaler" = local.autoscaler,
+ "admin_email" = var.admin_email
+ })
+ manager_startup = templatefile(
+ "${path.module}/startup-centos.sh",
+ {
+ "project" = var.project,
+ "cluster_name" = var.cluster_name,
+ "htserver_type" = "manager",
+ "osversion" = var.osversion,
+ "zone" = var.zone,
+ "region" = var.region,
+ "multizone" = var.multizone,
+ "max_replicas" = var.max_replicas,
+ "condorversion" = var.condorversion,
+ "autoscaler" = "",
+ "admin_email" = var.admin_email
+ })
+}
+data "google_compute_image" "startimage" {
+ family = var.osimage
+ project = var.osproject
+}
+resource "google_compute_instance" "condor-manager" {
+ boot_disk {
+ auto_delete = "true"
+ device_name = "boot"
+
+ initialize_params {
+ image = data.google_compute_image.startimage.self_link
+ size = "200"
+ type = "pd-standard"
+ }
+
+ mode = "READ_WRITE"
+ }
+
+ can_ip_forward = "false"
+ deletion_protection = "false"
+ enable_display = "false"
+
+ machine_type = var.instance_type
+ metadata_startup_script = local.manager_startup
+ name = "${var.cluster_name}-manager"
+ network_interface {
+ access_config {
+ network_tier = "PREMIUM"
+ }
+
+ network = "default"
+ #network_ip = "10.128.0.2"
+ subnetwork = "default"
+ subnetwork_project = var.project
+ }
+
+ project = var.project
+
+ scheduling {
+ automatic_restart = "true"
+ on_host_maintenance = "MIGRATE"
+ preemptible = "false"
+ }
+
+ service_account {
+ email = var.service_account
+ scopes = ["https://www.googleapis.com/auth/cloud-platform"]
+ }
+
+ shielded_instance_config {
+ enable_integrity_monitoring = "true"
+ enable_secure_boot = "false"
+ enable_vtpm = "true"
+ }
+
+ tags = ["${var.cluster_name}-manager"]
+ zone = var.zone
+}
+
+resource "google_compute_instance" "condor-submit" {
+ boot_disk {
+ auto_delete = "true"
+ device_name = "boot"
+
+ initialize_params {
+ image = data.google_compute_image.startimage.self_link
+ size = "200"
+ type = "pd-standard"
+ }
+
+ mode = "READ_WRITE"
+ }
+
+ can_ip_forward = "false"
+ deletion_protection = "false"
+ enable_display = "false"
+
+ labels = {
+ goog-dm = "mycondorcluster"
+ }
+
+ machine_type = var.instance_type
+ metadata_startup_script = local.submit_startup
+ name = "${var.cluster_name}-submit"
+
+ network_interface {
+ access_config {
+ network_tier = "PREMIUM"
+ }
+
+ network = "default"
+ #network_ip = "10.128.0.3"
+ subnetwork = "default"
+ subnetwork_project = var.project
+ }
+
+ project = var.project
+
+ scheduling {
+ automatic_restart = "true"
+ on_host_maintenance = "MIGRATE"
+ preemptible = "false"
+ }
+
+ service_account {
+ email = var.service_account
+ # email = "487217491196-compute@developer.gserviceaccount.com"
+ #scopes = ["https://www.googleapis.com/auth/monitoring.write", "https://www.googleapis.com/auth/compute", "https://www.googleapis.com/auth/servicecontrol", "https://www.googleapis.com/auth/devstorage.read_only", "https://www.googleapis.com/auth/logging.write", "https://www.googleapis.com/auth/service.management.readonly", "https://www.googleapis.com/auth/trace.append"]
+ scopes = ["https://www.googleapis.com/auth/cloud-platform"]
+ }
+
+ shielded_instance_config {
+ enable_integrity_monitoring = "true"
+ enable_secure_boot = "false"
+ enable_vtpm = "true"
+ }
+
+ tags = ["${var.cluster_name}-submit"]
+ zone = var.zone
+}
+resource "google_compute_instance_template" "condor-compute" {
+ can_ip_forward = "false"
+
+ disk {
+ auto_delete = "true"
+ boot = "true"
+ device_name = "boot"
+ disk_size_gb = "200"
+ mode = "READ_WRITE"
+ source_image = data.google_compute_image.startimage.self_link
+ type = "PERSISTENT"
+ }
+
+ machine_type = var.compute_instance_type
+
+ metadata = {
+ startup-script = local.compute_startup
+ }
+
+ name = "${var.cluster_name}-compute"
+
+ network_interface {
+ access_config {
+ network_tier = "PREMIUM"
+ }
+
+ network = "default"
+ }
+
+ project = var.project
+ region = var.zone
+
+ scheduling {
+ automatic_restart = "false"
+ on_host_maintenance = "TERMINATE"
+ preemptible = var.use_preemptibles
+ }
+
+ service_account {
+ email = var.service_account
+ scopes = ["cloud-platform"]
+ }
+
+ tags = ["${var.cluster_name}-compute"]
+}
+resource "google_compute_instance_group_manager" "condor-compute-igm" {
+ count = var.multizone ? 0 : 1
+ base_instance_name = var.cluster_name
+ name = var.cluster_name
+
+ project = var.project
+ target_size = "0"
+
+ update_policy {
+ max_surge_fixed = 2
+ minimal_action = "REPLACE"
+ type = "OPPORTUNISTIC"
+ }
+
+ version {
+ instance_template = google_compute_instance_template.condor-compute.self_link
+ name = ""
+ }
+ timeouts {
+ create = "60m"
+ delete = "2h"
+ }
+ # Yup, didn't want to use this, but I was getting create and destroy errors.
+ depends_on = [
+ google_compute_instance_template.condor-compute
+ ]
+ zone = var.zone
+}
+
+resource "google_compute_region_instance_group_manager" "condor-compute-igm" {
+ count = var.multizone ? 1 : 0
+ base_instance_name = var.cluster_name
+ name = var.cluster_name
+
+ project = var.project
+ target_size = "0"
+
+ update_policy {
+ max_surge_fixed = var.numzones
+ minimal_action = "REPLACE"
+ type = "OPPORTUNISTIC"
+ }
+
+ version {
+ instance_template = google_compute_instance_template.condor-compute.self_link
+ name = ""
+ }
+ timeouts {
+ create = "60m"
+ delete = "2h"
+ }
+ # Yup, didn't want to use this, but I was getting create and destroy errors.
+ depends_on = [
+ google_compute_instance_template.condor-compute
+ ]
+ region = var.region
+}
+/*
+resource "google_compute_autoscaler" "condor-compute-as" {
+ name = "${var.cluster_name}-compute-as"
+ project = var.project
+ target = google_compute_instance_group_manager.condor-compute-igm.self_link
+ zone = var.zone
+
+ autoscaling_policy {
+ cooldown_period = 30
+ max_replicas = var.max_replicas
+ min_replicas = var.min_replicas
+
+ cpu_utilization {
+ target = 0.2
+ }
+
+ metric {
+ name = "custom.googleapis.com/q0"
+ target = var.metric_target_queue
+ type = "GAUGE"
+ }
+ metric {
+ name = "custom.googleapis.com/la0"
+ target = var.metric_target_loadavg
+ type = "GAUGE"
+ }
+
+ }
+
+ timeouts {
+ create = "60m"
+ delete = "2h"
+ }
+
+ depends_on = [
+ google_compute_instance_group_manager.condor-compute-igm
+ ]
+}
+*/
+
+output "startup_script" {
+ value = local.submit_startup
+}
\ No newline at end of file
diff --git a/docs/tutorials/multinode/terraform/htcondor/startup-centos.sh b/docs/tutorials/multinode/terraform/htcondor/startup-centos.sh
new file mode 100644
index 00000000..7ec9572d
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/htcondor/startup-centos.sh
@@ -0,0 +1,189 @@
+#!/bin/bash -x
+
+SERVER_TYPE="${htserver_type}"
+
+##############################################################
+## Install and configure HTCONDOR
+##############################################################
+
+if [ "${condorversion}" == "" ]; then
+ CONDOR_INSTALL_OPT="condor"
+else
+ CONDOR_INSTALL_OPT="condor-all-${condorversion}"
+ # email = "487217491196-compute@developer.gserviceaccount.com"
+fi
+if [ "${osversion}" == "6" ]; then
+ CONDOR_STARTUP_CMD="service condor start"
+else
+ CONDOR_STARTUP_CMD="systemctl start condor;systemctl enable condor"
+fi
+CONDOR_REPO_URL=https://research.cs.wisc.edu/htcondor/yum/repo.d/htcondor-stable-rhel${osversion}.repo
+
+sleep 2 #Give it some time to setup yum
+cd /tmp
+yum update -y
+yum install -y wget curl net-tools vim gcc python3 git
+wget https://research.cs.wisc.edu/htcondor/yum/RPM-GPG-KEY-HTCondor
+rpm --import RPM-GPG-KEY-HTCondor
+cd /etc/yum.repos.d && wget $CONDOR_REPO_URL
+yum install -y $CONDOR_INSTALL_OPT
+
+##############################################################
+# Install Docker on Compute Nodes
+##############################################################
+if [ "$SERVER_TYPE" == "compute" ]; then
+ yum install -y yum-utils
+ yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
+ yum install -y docker-ce docker-ce-cli containerd.io
+ systemctl start docker
+ systemctl enable docker
+ usermod -aG docker condor
+fi
+
+##############################################################
+# Configure Condor Daemons
+##############################################################
+cd /tmp
+cat < condor_config.local
+DISCARD_SESSION_KEYRING_ON_STARTUP=False
+CONDOR_ADMIN=${admin_email}
+CONDOR_HOST=${cluster_name}-manager
+EOF
+
+# Case for compute
+if [ "$SERVER_TYPE" == "compute" ]; then
+cat <> condor_config.local
+# Standard Stuff
+DAEMON_LIST = MASTER, STARTD
+ALLOW_WRITE = \$(ALLOW_WRITE), \$(CONDOR_HOST)
+# Run Dynamics Slots
+NUM_SLOTS = 1
+NUM_SLOTS_TYPE_1 = 1
+SLOT_TYPE_1 = 100%
+SLOT_TYPE_1_PARTITIONABLE = TRUE
+# Allowing Run as Owner
+STARTER_ALLOW_RUNAS_OWNER = TRUE
+SUBMIT_ATTRS = RunAsOwner
+RunAsOwner = True
+UID_DOMAIN = c.${project}.internal
+TRUST_UID_DOMAIN = True
+HasDocker = True
+EOF1
+fi
+
+# Case for manager
+if [ "$SERVER_TYPE" == "manager" ]; then
+cat <> condor_config.local
+DAEMON_LIST = MASTER, COLLECTOR, NEGOTIATOR
+ALLOW_WRITE = *
+EOF2
+fi
+
+# Case for submit
+if [ "$SERVER_TYPE" == "submit" ]; then
+cat <> condor_config.local
+DAEMON_LIST = MASTER, SCHEDD
+ALLOW_WRITE = \$(ALLOW_WRITE), \$(CONDOR_HOST)
+# Allowing Run as Owner
+STARTER_ALLOW_RUNAS_OWNER = TRUE
+SUBMIT_ATTRS = RunAsOwner
+RunAsOwner = True
+UID_DOMAIN = c.${project}.internal
+TRUST_UID_DOMAIN = True
+EOF3
+fi
+
+
+mkdir -p /etc/condor/config.d
+mv condor_config.local /etc/condor/config.d
+eval $CONDOR_STARTUP_CMD
+
+##############################################################
+# Install and configure logging agent for StackDriver
+##############################################################
+curl -sSO https://dl.google.com/cloudagents/add-logging-agent-repo.sh
+bash add-logging-agent-repo.sh --also-install
+
+# Install Custom Metric Plugin:
+google-fluentd-gem install fluent-plugin-stackdriver-monitoring
+
+# Create Fluentd Config
+
+cat < condor.conf
+
+
+
+
+ @type stackdriver_monitoring
+ project ${project}
+
+ key la0
+ type custom.googleapis.com/la0
+ metric_kind GAUGE
+ value_type DOUBLE
+
+
+
+ @type stackdriver_monitoring
+ project ${project}
+
+ key q0
+ type custom.googleapis.com/q0
+ metric_kind GAUGE
+ value_type INT64
+
+
+EOF
+mkdir -p /etc/google-fluentd/config.d/
+mv condor.conf /etc/google-fluentd/config.d/
+
+if [ "$SERVER_TYPE" == "submit" ]; then
+mkdir -p /var/log/condor/jobs
+touch /var/log/condor/jobs/stats.log
+chmod 666 /var/log/condor/jobs/stats.log
+fi
+
+service google-fluentd restart
+
+# Add Python Libraries and Autoscaler
+if [ "$SERVER_TYPE" == "submit" ]; then
+ python3 -m pip install --upgrade oauth2client
+ python3 -m pip install --upgrade google-api-python-client
+ python3 -m pip install --upgrade absl-py
+
+cat < /opt/autoscaler.py
+${autoscaler}
+EOFZ
+
+# Create cron entry for autoscaler. Log to /var/log/messages
+
+echo "* * * * * python3 /opt/autoscaler.py --p ${project} --z ${zone} --r ${region} %{ if multizone }--mz %{ endif }--g ${cluster_name} --c ${max_replicas} | logger " |crontab -
+
+fi
+
+# Now we can let everyone know that the setup is complete.
+
+wall "******* HTCondor system configuration complete ********"
diff --git a/docs/tutorials/multinode/terraform/init.sh b/docs/tutorials/multinode/terraform/init.sh
new file mode 100644
index 00000000..775ef679
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/init.sh
@@ -0,0 +1,31 @@
+# ---- Edit below -----#
+
+export TF_VAR_project=[USER_PROJECT]
+export TF_VAR_zone=us-east4-c
+export TF_VAR_region=us-east4
+
+export TF_VAR_multizone=false
+# For regional/multizone, set this to the number of regions in the zone.
+export TF_VAR_numzones=4
+
+# ---- Do not edit below -----#
+
+export TF_VAR_project_id=${TF_VAR_project}
+export TF_VAR_service_account="htcondor@"${TF_VAR_project}".iam.gserviceaccount.com"
+
+gcloud config set project $TF_VAR_project
+gcloud services enable compute.googleapis.com
+gcloud services enable monitoring.googleapis.com
+gcloud config set compute/zone $TF_VAR_zone
+gcloud config set compute/region $TF_VAR_region
+
+gcloud config list
+
+gcloud iam service-accounts create htcondor --display-name="Run HTCondor"
+
+# Add roles
+gcloud projects add-iam-policy-binding ${TF_VAR_project} --member serviceAccount:${TF_VAR_service_account} --role roles/compute.admin
+gcloud projects add-iam-policy-binding ${TF_VAR_project} --member serviceAccount:${TF_VAR_service_account} --role roles/iam.serviceAccountUser
+gcloud projects add-iam-policy-binding ${TF_VAR_project} --member serviceAccount:${TF_VAR_service_account} --role roles/monitoring.admin
+gcloud projects add-iam-policy-binding ${TF_VAR_project} --member serviceAccount:${TF_VAR_service_account} --role roles/logging.admin
+gcloud projects add-iam-policy-binding ${TF_VAR_project} --member serviceAccount:${TF_VAR_service_account} --role roles/autoscaling.metricsWriter
diff --git a/docs/tutorials/multinode/terraform/init.tf b/docs/tutorials/multinode/terraform/init.tf
new file mode 100644
index 00000000..8660f9fa
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/init.tf
@@ -0,0 +1,2 @@
+provider "google" {
+}
diff --git a/docs/tutorials/multinode/terraform/main.tf b/docs/tutorials/multinode/terraform/main.tf
new file mode 100644
index 00000000..9e92cc74
--- /dev/null
+++ b/docs/tutorials/multinode/terraform/main.tf
@@ -0,0 +1,40 @@
+variable "project" {
+ type=string
+}
+variable "zone" {
+ type=string
+}
+variable "region" {
+ type=string
+}
+variable "multizone" {
+ type=bool
+}
+variable "numzones" {
+ type=string
+}
+
+variable "cluster_name" {
+ type = string
+ default = "c"
+ description = "Name used to prefix resources in cluster."
+
+}
+
+module "htcondor" {
+ source = "./htcondor/"
+ cluster_name = var.cluster_name
+ project = var.project
+ zone = var.zone
+ region = var.region
+ multizone = var.multizone
+ numzones = var.numzones
+ osversion = "7"
+ max_replicas=20
+ min_replicas=0
+ compute_instance_type = "custom-2-11264"
+ service_account="htcondor@${var.project}.iam.gserviceaccount.com"
+ use_preemptibles=false
+ osproject ="centos-cloud"
+ osimage ="centos-7"
+}
\ No newline at end of file
diff --git a/docs/tutorials/noisy_qsimcirq.ipynb b/docs/tutorials/noisy_qsimcirq.ipynb
index 6f366593..af2e5c89 100644
--- a/docs/tutorials/noisy_qsimcirq.ipynb
+++ b/docs/tutorials/noisy_qsimcirq.ipynb
@@ -98,7 +98,7 @@
"try:\n",
" import qsimcirq\n",
"except ImportError:\n",
- " !pip install qsimcirq==0.9.5 --quiet\n",
+ " !pip install qsimcirq --quiet\n",
" import qsimcirq"
]
},
@@ -284,4 +284,4 @@
"metadata": {}
}
]
-}
\ No newline at end of file
+}
diff --git a/docs/tutorials/qsimcirq_gcp.md b/docs/tutorials/qsimcirq_gcp.md
index 36ac704b..3945564a 100644
--- a/docs/tutorials/qsimcirq_gcp.md
+++ b/docs/tutorials/qsimcirq_gcp.md
@@ -53,7 +53,7 @@ Then click on *Create* for a new VM instance:
![alt_text](../images/qsimcirq_gcp/image7.png )
-### Build a Container Optimized VM
+### Build a Container Optimized VM with container deployed
To create the VM use the steps in sequence below:
@@ -72,14 +72,17 @@ To create the VM use the steps in sequence below:
* Choose the [Machine Type](https://cloud.google.com/blog/products/compute/choose-the-right-google-compute-engine-machine-type-for-you): n2-standard-16
* 16 CPUs
* 64GB memory
-* Choose the Boot Disk image:[ Container-Optimized OS](https://cloud.google.com/container-optimized-os/docs/concepts/features-and-benefits)
* Leave the remaining as defaults.
![alt_text](../images/qsimcirq_gcp/image10.png )
+> Select `Deploy a container image to this VM instance`.
-Finally, enable HTTP access and click *Create*.
-
-![alt_text](../images/qsimcirq_gcp/image8.png )
+For the container image enter:
+```
+gcr.io/quantum-builds/github.com/quantumlib/jupyter_qsim:latest
+```
+![alt_text](../images/qsimcirq_gcp/container.png )
+> This may take a few minutes to complete, even after the VM is created, the container will take some time to complete.
## Preparing your local system to connect to Colab
@@ -145,32 +148,7 @@ You should now see the command line prompt from your VM:
wellhello@qsim-1 ~ $
```
-### Run the Jupyter / qsim container on your VM
-
-At the command prompt you can now start a Docker container with all the required
-code to run simulations. Start the container:
-
-```
-$ docker run -v `pwd`:/homedir -p 8888:8888 gcr.io/quantum-builds/github.com/quantumlib/jupyter_qsim:latest
-```
-
-You should see several lines of output ending with lines like below. (If you get
-an error about `permission denied` you may need to run docker with `sudo` as
-[described here](https://docs.docker.com/engine/reference/run/#general-form)).
-
-
-```
-To access the notebook, open this file in a browser:
- file:///root/.local/share/jupyter/runtime/nbserver-1-open.html
-Or copy and paste one of these URLs:
- http://e1f7a7cca9fa:8888/?token=aa16e1b6d3f51c58928037d34cc6854dac47347dd4c0eae5
- or http://127.0.0.1:8888/?token=aa16e1b6d3f51c58928037d34cc6854dac47347dd4c0eae5
-```
-
-Copy the last URL in the output. Edit the URL to replace `127.0.0.1` with
-`localhost`. Save this URL for the next step. This URL points to your local
-runtime, running as a Docker container on your VM.
-
+The container port is now forwarded to your local machine.
## Connect Colab to your local runtime
@@ -185,9 +163,9 @@ get the UI:
Select *Connect to local runtime*. You will see the UI:
-
+
-Pass the edited URL from the previous section, then click *Connect*:
+Type in the URL `http://localhost:8888/` , then click *Connect*:
@@ -213,7 +191,7 @@ In the previous step, you copied a URL like below. It is easy to just copy that
URL and paste it directly into a browser running on your local machine.
```
-http://127.0.0.1:8888/?token=7191178ae9aa4ebe1698b07bb67dea1d289cfd0e0b960373
+http://127.0.0.1:8888/
```
In the browser you should now see the Jupyter UI:
@@ -230,11 +208,6 @@ You can now run these cells as you would in any notebook.
![alt_text](../images/qsimcirq_gcp/image1.png)
-If you choose to modify the notebook, you can save it on the *qsim-1* VM from *File* -> *Save As*, and saving to `/homedir/mynotebook.ipynb`. This will save in your home directory on your VM. If you intend to destroy the VM after this tutorial, either download the notebooks from the VM or save directly from your browser.
-
-![alt_text](../images/qsimcirq_gcp/image4.png)
-
-
## Run interactively
To run interactively within the container, you can open a second shell window to
@@ -312,24 +285,6 @@ output vector: (0.5+0.5j)|0⟩ + (0.5-0.5j)|1⟩
You have successfully simulated a quantum circuit on Google Cloud Platform using
a Docker container.
-### Running your own script
-
-If you want to run a Python script, you can locate a file in the home directory
-on your VM, then run something like the following in the container shell:
-
-```
-$ python3 /homedir/myscript.py
-```
-
-### Exit the container
-
-Exit the container by typing ctrl-d twice. You will see the output like:
-
-```
-[root@79804d33f250 /]# exit
-```
-
-
## Clean up
To avoid incurring charges to your Google Cloud Platform account for the
diff --git a/docs/usage.md b/docs/usage.md
index 7bc5e525..33b7f317 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -13,7 +13,7 @@ Sample circuits are provided in
## qsim_base usage
```
-./qsim_base.x -c circuit_file -d maxtime -t num_threads -f max_fused_size -v verbosity
+./qsim_base.x -c circuit_file -d maxtime -t num_threads -f max_fused_size -v verbosity -z
```
| Flag | Description |
@@ -22,11 +22,23 @@ Sample circuits are provided in
|`-d maxtime` | maximum time |
|`-t num_threads` | number of threads to use|
|`-f max_fused_size` | maximum fused gate size|
-|`-v verbosity` | verbosity level (0,1,>1)|
+|`-v verbosity` | verbosity level (0,1,2,3,4,5)|
+|`-z` | set flush-to-zero and denormals-are-zeros MXCSR control flags|
qsim_base computes all the amplitudes and just prints the first eight of them
(or a smaller number for 1- or 2-qubit circuits).
+Verbosity levels are described in the following table.
+
+| Verbosity level | Description |
+|-----------------|-------------|
+| 0 | no additional information|
+| 1 | add total simulation runtime|
+| 2 | add initialization runtime and fuser runtime|
+| 3 | add basic fuser statistics|
+| 4 | add simulation runtime for each fused gate|
+| 5 | additional fuser information (qubit indices for each fused gate)|
+
Example:
```
./qsim_base.x -c ../circuits/circuit_q24 -d 16 -t 8 -v 1
@@ -35,7 +47,7 @@ Example:
## qsim_von_neumann usage
```
-./qsim_von_neumann.x -c circuit_file -d maxtime -t num_threads -f max_fused_size -v verbosity
+./qsim_von_neumann.x -c circuit_file -d maxtime -t num_threads -f max_fused_size -v verbosity -z
```
@@ -45,7 +57,8 @@ Example:
|`-d maxtime` | maximum time |
|`-t num_threads` | number of threads to use|
|`-f max_fused_size` | maximum fused gate size|
-|`-v verbosity` | verbosity level (0,1,>1)|
+|`-v verbosity` | verbosity level (0,1,2,3,4,5)|
+|`-z` | set flush-to-zero and denormals-are-zeros MXCSR control flags|
qsim_von_neumann computes all the amplitudes and calculates the von Neumann
entropy. Note that this can be quite slow for large circuits and small thread
@@ -64,18 +77,19 @@ Example:
-i input_files \
-o output_files \
-f max_fused_size \
- -t num_threads -v verbosity
+ -t num_threads -v verbosity -z
```
| Flag | Description |
|-------|------------|
|`-c circuit_file` | circuit file to run|
-|`-d times_to_save_results` | comma-separated list of circuit times to save results at|
+|`-d times_to_save_results` | comma-separated list of circuit times to save results at|
|`-i input_files` | comma-separated list of bitstring input files|
|`-o output_files` | comma-separated list of amplitude output files|
|`-t num_threads` | number of threads to use|
|`-f max_fused_size` | maximum fused gate size|
-|`-v verbosity` | verbosity level (0,1,>1)|
+|`-v verbosity` | verbosity level (0,1,2,3,4,5)|
+|`-z` | set flush-to-zero and denormals-are-zeros MXCSR control flags|
qsim_amplitudes reads input files of bitstrings, computes the corresponding
amplitudes at specified times and writes them to output files.
@@ -88,6 +102,39 @@ Example:
./qsim_amplitudes.x -c ../circuits/circuit_q24 -t 4 -d 16,24 -i ../circuits/bitstrings_q24_s1,../circuits/bitstrings_q24_s2 -o ampl_q24_s1,ampl_q24_s2 -v 1
```
+## qsim_qtrajectory_cuda usage
+
+```
+./qsim_qtrajectory_cuda.x -c circuit_file \
+ -d times_to_calculate_observables \
+ -a amplitude_damping_const \
+ -p phase_damping_const \
+ -t traj0 -n num_trajectories \
+ -f max_fused_size \
+ -v verbosity
+```
+
+| Flag | Description |
+|-------|------------|
+|`-c circuit_file` | circuit file to run|
+|`-d times_to_calculate_observables` | comma-separated list of circuit times to calculate observables at|
+|`-a amplitude_damping_const` | amplitude damping constant |
+|`-p phase_damping_const` | phase damping constant |
+|`-t traj0` | starting trajectory |
+|`-n num_trajectories ` | number of trajectories to run starting with `traj0` |
+|`-f max_fused_size` | maximum fused gate size|
+|`-v verbosity` | verbosity level (0,1,2,3,4,5)|
+
+qsim_qtrajectory_cuda runs on GPUs. qsim_qtrajectory_cuda performs quantum
+trajactory simulations with amplitude damping and phase damping noise channels.
+qsim_qtrajectory_cuda calculates observables (operator X at each qubit) at
+specified times.
+
+Example:
+```
+./qsim_qtrajectory_cuda.x -c ../circuits/circuit_q24 -d 8,16,32 -a 0.005 -p 0.005 -t 0 -n 100 -f 4 -v 0
+```
+
## qsimh_base usage
```
@@ -97,20 +144,20 @@ Example:
-w prefix \
-p num_prefix_gates \
-r num_root_gates \
- -t num_threads -v verbosity
+ -t num_threads -v verbosity -z
```
| Flag | Description |
|-------|------------|
|`-c circuit_file` | circuit file to run|
|`-d maxtime` | maximum time |
-|`-k part1_qubits` | comma-separated list of qubit indices for part 1 |
+|`-k part1_qubits` | comma-separated list of qubit indices for part 1|
|`-w prefix`| prefix value |
|`-p num_prefix_gates` | number of prefix gates|
|`-r num_root_gates` | number of root gates|
|`-t num_threads` | number of threads to use|
-|`-v verbosity` | verbosity level (0,>0)|
-
+|`-v verbosity` | verbosity level (0,1,4,5)|
+|`-z` | set flush-to-zero and denormals-are-zeros MXCSR control flags|
qsimh_base just computes and just prints the first eight amplitudes. The hybrid
Schrödinger-Feynman method is used. The lattice is split into two parts.
@@ -176,21 +223,22 @@ maximum "time".
-p num_prefix_gates \
-r num_root_gates \
-i input_file -o output_file \
- -t num_threads -v verbosity
+ -t num_threads -v verbosity -z
```
| Flag | Description |
|-------|------------|
|`-c circuit_file` | circuit file to run|
|`-d maxtime` | maximum time |
-|`-k part1_qubits` | comma-separated list of qubit indices for part 1 |
+|`-k part1_qubits` | comma-separated list of qubit indices for part 1|
|`-w prefix`| prefix value |
|`-p num_prefix_gates` | number of prefix gates|
|`-r num_root_gates` | number of root gates|
|`-i input_file` | bitstring input file|
|`-o output_file` | amplitude output file|
|`-t num_threads` | number of threads to use|
-|`-v verbosity` | verbosity level (0,>0)|
+|`-v verbosity` | verbosity level (0,1,4,5)|
+|`-z` | set flush-to-zero and denormals-are-zeros MXCSR control flags|
qsimh_amplitudes reads the input file of bitstrings, computes the corresponding
amplitudes and writes them to the output file. The hybrid Schrödinger-Feynman
diff --git a/jupyter/Dockerfile b/jupyter/Dockerfile
index 29043611..35610cd9 100644
--- a/jupyter/Dockerfile
+++ b/jupyter/Dockerfile
@@ -1,8 +1,15 @@
# Base OS
FROM centos:8
USER root
-# Install baseline
+# Centos 8 has reach end of life: https://www.centos.org/centos-linux-eol/
+# Configuration must be loaded from the vault.
+RUN pushd /etc/yum.repos.d/ && \
+ sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
+ sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* && \
+ popd
+
+# Install baseline
RUN yum -y update && \
yum install -y epel-release && \
yum group install -y "Development Tools" && \
@@ -22,6 +29,5 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws
CMD ["jupyter-notebook", "--port=8888", "--no-browser",\
"--ip=0.0.0.0", "--allow-root", \
"--NotebookApp.allow_origin='*'", \
- "--NotebookApp.port_retries=0"]
-
- #"--NotebookApp.allow_origin='https://colab.research.google.com'", \
\ No newline at end of file
+ "--NotebookApp.port_retries=0", \
+ "--NotebookApp.token=''"]
diff --git a/lib/BUILD b/lib/BUILD
index 3316f531..9be39dea 100644
--- a/lib/BUILD
+++ b/lib/BUILD
@@ -1,8 +1,14 @@
package(default_visibility = ["//visibility:public"])
+# Libraries of the following form:
+# # cuda_library
+# cc_library(...)
+# are converted to cuda_library rules when imported to the Google codebase.
+# Do not modify this tag.
+
##### Aggregate libraries #####
-# Full qsim library
+# Full qsim library, minus CUDA
cc_library(
name = "qsim_lib",
hdrs = [
@@ -34,6 +40,66 @@ cc_library(
"run_qsimh.h",
"seqfor.h",
"simmux.h",
+ "simulator.h",
+ "simulator_avx.h",
+ "simulator_avx512.h",
+ "simulator_basic.h",
+ "simulator_sse.h",
+ "statespace_avx.h",
+ "statespace_avx512.h",
+ "statespace_basic.h",
+ "statespace_sse.h",
+ "statespace.h",
+ "umux.h",
+ "unitaryspace.h",
+ "unitaryspace_avx.h",
+ "unitaryspace_avx512.h",
+ "unitaryspace_basic.h",
+ "unitaryspace_sse.h",
+ "unitary_calculator_avx.h",
+ "unitary_calculator_avx512.h",
+ "unitary_calculator_basic.h",
+ "unitary_calculator_sse.h",
+ "util.h",
+ "util_cpu.h",
+ "vectorspace.h",
+ ],
+)
+
+# Full qsim library, including CUDA
+# cuda_library
+cc_library(
+ name = "qsim_cuda_lib",
+ hdrs = [
+ "bits.h",
+ "bitstring.h",
+ "channel.h",
+ "channels_cirq.h",
+ "circuit_noisy.h",
+ "circuit_qsim_parser.h",
+ "circuit.h",
+ "expect.h",
+ "formux.h",
+ "fuser.h",
+ "fuser_basic.h",
+ "fuser_mqubit.h",
+ "gate.h",
+ "gate_appl.h",
+ "gates_cirq.h",
+ "gates_qsim.h",
+ "hybrid.h",
+ "io_file.h",
+ "io.h",
+ "matrix.h",
+ "mps_simulator.h",
+ "mps_statespace.h",
+ "parfor.h",
+ "qtrajectory.h",
+ "run_qsim.h",
+ "run_qsimh.h",
+ "seqfor.h",
+ "simmux.h",
+ "simulator.h",
"simulator_avx.h",
"simulator_avx512.h",
"simulator_basic.h",
@@ -58,6 +124,7 @@ cc_library(
"unitary_calculator_basic.h",
"unitary_calculator_sse.h",
"util.h",
+ "util_cpu.h",
"util_cuda.h",
"vectorspace.h",
"vectorspace_cuda.h",
@@ -86,6 +153,7 @@ cc_library(
"run_qsim.h",
"seqfor.h",
"simmux.h",
+ "simulator.h",
"simulator_avx.h",
"simulator_avx512.h",
"simulator_basic.h",
@@ -104,6 +172,7 @@ cc_library(
"unitary_calculator_basic.h",
"unitary_calculator_sse.h",
"util.h",
+ "util_cpu.h",
"vectorspace.h",
],
)
@@ -131,6 +200,7 @@ cc_library(
"run_qsimh.h",
"seqfor.h",
"simmux.h",
+ "simulator.h",
"simulator_avx.h",
"simulator_avx512.h",
"simulator_basic.h",
@@ -141,6 +211,7 @@ cc_library(
"statespace_basic.h",
"statespace_sse.h",
"util.h",
+ "util_cpu.h",
"vectorspace.h",
],
)
@@ -171,6 +242,12 @@ cc_library(
hdrs = ["util.h"],
)
+cc_library(
+ name = "util_cpu",
+ hdrs = ["util_cpu.h"],
+)
+
+# cuda_library
cc_library(
name = "util_cuda",
hdrs = ["util_cuda.h"],
@@ -331,6 +408,7 @@ cc_library(
hdrs = ["vectorspace.h"],
)
+# cuda_library
cc_library(
name = "vectorspace_cuda",
hdrs = ["vectorspace_cuda.h"],
@@ -384,6 +462,7 @@ cc_library(
],
)
+# cuda_library
cc_library(
name = "statespace_cuda",
hdrs = [
@@ -399,11 +478,17 @@ cc_library(
### Simulator libraries ###
+cc_library(
+ name = "simulator_base",
+ hdrs = ["simulator.h"],
+ deps = [":bits"],
+)
+
cc_library(
name = "simulator_avx",
hdrs = ["simulator_avx.h"],
deps = [
- ":bits",
+ ":simulator_base",
":statespace_avx",
],
)
@@ -412,7 +497,7 @@ cc_library(
name = "simulator_avx512",
hdrs = ["simulator_avx512.h"],
deps = [
- ":bits",
+ ":simulator_base",
":statespace_avx512",
],
)
@@ -421,7 +506,7 @@ cc_library(
name = "simulator_basic",
hdrs = ["simulator_basic.h"],
deps = [
- ":bits",
+ ":simulator_base",
":statespace_basic",
],
)
@@ -430,11 +515,12 @@ cc_library(
name = "simulator_sse",
hdrs = ["simulator_sse.h"],
deps = [
- ":bits",
+ ":simulator_base",
":statespace_sse",
],
)
+# cuda_library
cc_library(
name = "simulator_cuda",
hdrs = [
@@ -475,7 +561,10 @@ cc_library(
cc_library(
name = "channel",
hdrs = ["channel.h"],
- deps = [":gate"],
+ deps = [
+ ":gate",
+ ":matrix",
+ ],
)
cc_library(
@@ -557,7 +646,7 @@ cc_library(
name = "unitary_calculator_avx",
hdrs = ["unitary_calculator_avx.h"],
deps = [
- ":bits",
+ ":simulator_base",
":unitaryspace_avx",
],
)
@@ -566,7 +655,7 @@ cc_library(
name = "unitary_calculator_avx512",
hdrs = ["unitary_calculator_avx512.h"],
deps = [
- ":bits",
+ ":simulator_base",
":unitaryspace_avx512",
],
)
@@ -575,7 +664,7 @@ cc_library(
name = "unitary_calculator_basic",
hdrs = ["unitary_calculator_basic.h"],
deps = [
- ":bits",
+ ":simulator_base",
":unitaryspace_basic",
],
)
@@ -584,7 +673,7 @@ cc_library(
name = "unitary_calculator_sse",
hdrs = ["unitary_calculator_sse.h"],
deps = [
- ":bits",
+ ":simulator_base",
":unitaryspace_sse",
],
)
diff --git a/lib/channel.h b/lib/channel.h
index 5f2a187b..372a174c 100644
--- a/lib/channel.h
+++ b/lib/channel.h
@@ -15,7 +15,11 @@
#ifndef CHANNEL_H_
#define CHANNEL_H_
+#include
+#include
+
#include "gate.h"
+#include "matrix.h"
namespace qsim {
@@ -24,6 +28,8 @@ namespace qsim {
*/
template
struct KrausOperator {
+ using fp_type = typename Gate::fp_type;
+
enum Kind {
kNormal = 0,
kMeasurement = gate::kMeasurement,
@@ -49,6 +55,68 @@ struct KrausOperator {
* one operation.
*/
std::vector ops;
+
+ /**
+ * Product of K^\dagger and K. This can be empty if unitary = true.
+ */
+ Matrix kd_k;
+
+ /**
+ * Qubits kd_k acts on. This can be empty if unitary = true.
+ */
+ std::vector qubits;
+
+ /**
+ * Calculates the product of "K^\dagger K". Sets qubits "K^\dagger K" acts on.
+ */
+ void CalculateKdKMatrix() {
+ if (ops.size() == 1) {
+ kd_k = ops[0].matrix;
+ MatrixDaggerMultiply(ops[0].qubits.size(), ops[0].matrix, kd_k);
+ qubits = ops[0].qubits;
+ } else if (ops.size() > 1) {
+ std::set qubit_map;
+
+ for (const auto& op : ops) {
+ for (unsigned q : op.qubits) {
+ qubit_map.insert(q);
+ }
+ }
+
+ unsigned num_qubits = qubit_map.size();
+
+ qubits.resize(0);
+ qubits.reserve(num_qubits);
+
+ for (auto it = qubit_map.begin(); it != qubit_map.end(); ++it) {
+ qubits.push_back(*it);
+ }
+
+ MatrixIdentity(unsigned{1} << num_qubits, kd_k);
+
+ for (const auto& op : ops) {
+ if (op.qubits.size() == num_qubits) {
+ MatrixMultiply(num_qubits, op.matrix, kd_k);
+ } else {
+ unsigned mask = 0;
+
+ for (auto q : op.qubits) {
+ for (unsigned i = 0; i < num_qubits; ++i) {
+ if (q == qubits[i]) {
+ mask |= unsigned{1} << i;
+ break;
+ }
+ }
+ }
+
+ MatrixMultiply(mask, op.qubits.size(), op.matrix, num_qubits, kd_k);
+ }
+ }
+
+ auto m = kd_k;
+ MatrixDaggerMultiply(num_qubits, m, kd_k);
+ }
+ }
};
/**
diff --git a/lib/channels_cirq.h b/lib/channels_cirq.h
index a8fd87ce..69f1df9d 100644
--- a/lib/channels_cirq.h
+++ b/lib/channels_cirq.h
@@ -237,11 +237,22 @@ struct GeneralizedAmplitudeDampingChannel {
using M = Cirq::MatrixGate1;
auto normal = KrausOperator>::kNormal;
-
- return {{normal, 0, p1, {M::Create(time, q, {t1, 0, 0, 0, 0, 0, r1, 0})}},
- {normal, 0, p2, {M::Create(time, q, {r2, 0, 0, 0, 0, 0, t2, 0})}},
- {normal, 0, p3, {M::Create(time, q, { 0, 0, s1, 0, 0, 0, 0, 0})}},
- {normal, 0, p3, {M::Create(time, q, { 0, 0, 0, 0, s2, 0, 0, 0})}},
+ return {{normal, 0, p1,
+ {M::Create(time, q, {t1, 0, 0, 0, 0, 0, r1, 0})},
+ {t1 * t1, 0, 0, 0, 0, 0, r1 * r1, 0}, {q},
+ },
+ {normal, 0, p2,
+ {M::Create(time, q, {r2, 0, 0, 0, 0, 0, t2, 0})},
+ {r2 * r2, 0, 0, 0, 0, 0, t2 * t2, 0}, {q},
+ },
+ {normal, 0, p3,
+ {M::Create(time, q, {0, 0, s1, 0, 0, 0, 0, 0})},
+ {0, 0, 0, 0, 0, 0, s1 * s1, 0}, {q},
+ },
+ {normal, 0, p3,
+ {M::Create(time, q, {0, 0, 0, 0, s2, 0, 0, 0})},
+ {s2 * s2, 0, 0, 0, 0, 0, 0, 0}, {q},
+ },
};
}
@@ -281,8 +292,14 @@ struct AmplitudeDampingChannel {
using M = Cirq::MatrixGate1;
auto normal = KrausOperator>::kNormal;
- return {{normal, 0, p1, {M::Create(time, q, {1, 0, 0, 0, 0, 0, r, 0})}},
- {normal, 0, p2, {M::Create(time, q, {0, 0, s, 0, 0, 0, 0, 0})}},
+ return {{normal, 0, p1,
+ {M::Create(time, q, {1, 0, 0, 0, 0, 0, r, 0})},
+ {1, 0, 0, 0, 0, 0, r * r, 0}, {q},
+ },
+ {normal, 0, p2,
+ {M::Create(time, q, {0, 0, s, 0, 0, 0, 0, 0})},
+ {0, 0, 0, 0, 0, 0, s * s, 0}, {q},
+ },
};
}
@@ -320,8 +337,14 @@ struct PhaseDampingChannel {
using M = Cirq::MatrixGate1;
auto normal = KrausOperator>::kNormal;
- return {{normal, 0, p1, {M::Create(time, q, {1, 0, 0, 0, 0, 0, r, 0})}},
- {normal, 0, p2, {M::Create(time, q, {0, 0, 0, 0, 0, 0, s, 0})}},
+ return {{normal, 0, p1,
+ {M::Create(time, q, {1, 0, 0, 0, 0, 0, r, 0})},
+ {1, 0, 0, 0, 0, 0, r * r, 0}, {q},
+ },
+ {normal, 0, p2,
+ {M::Create(time, q, {0, 0, 0, 0, 0, 0, s, 0})},
+ {0, 0, 0, 0, 0, 0, s * s, 0}, {q},
+ },
};
}
@@ -351,8 +374,14 @@ struct ResetChannel {
using M = Cirq::MatrixGate1;
auto normal = KrausOperator>::kNormal;
- return {{normal, 0, 0, {M::Create(time, q, {1, 0, 0, 0, 0, 0, 0, 0})}},
- {normal, 0, 0, {M::Create(time, q, {0, 0, 1, 0, 0, 0, 0, 0})}},
+ return {{normal, 0, 0,
+ {M::Create(time, q, {1, 0, 0, 0, 0, 0, 0, 0})},
+ {1, 0, 0, 0, 0, 0, 0, 0}, {q},
+ },
+ {normal, 0, 0,
+ {M::Create(time, q, {0, 0, 1, 0, 0, 0, 0, 0})},
+ {0, 0, 0, 0, 0, 0, 1, 0}, {q},
+ },
};
}
};
diff --git a/lib/channels_qsim.h b/lib/channels_qsim.h
new file mode 100644
index 00000000..5c07bccf
--- /dev/null
+++ b/lib/channels_qsim.h
@@ -0,0 +1,117 @@
+// Copyright 2019 Google LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CHANNELS_QSIM_H_
+#define CHANNELS_QSIM_H_
+
+#include
+#include
+#include
+
+#include "channel.h"
+#include "gates_qsim.h"
+
+namespace qsim {
+
+/**
+ * Amplitude damping channel factory.
+ */
+template
+struct AmplitudeDampingChannel {
+ AmplitudeDampingChannel(double gamma) : gamma(gamma) {}
+
+ static Channel> Create(
+ unsigned time, unsigned q, double gamma) {
+ double p1 = 1 - gamma;
+ double p2 = 0;
+
+ fp_type r = std::sqrt(p1);
+ fp_type s = std::sqrt(gamma);
+
+ using M = GateMatrix1;
+ auto normal = KrausOperator>::kNormal;
+
+ return {{normal, 0, p1,
+ {M::Create(time, q, {1, 0, 0, 0, 0, 0, r, 0})},
+ {1, 0, 0, 0, 0, 0, r * r, 0}, {q},
+ },
+ {normal, 0, p2,
+ {M::Create(time, q, {0, 0, s, 0, 0, 0, 0, 0})},
+ {0, 0, 0, 0, 0, 0, s * s, 0}, {q},
+ },
+ };
+ }
+
+ Channel> Create(unsigned time, unsigned q) const {
+ return Create(time, q, gamma);
+ }
+
+ double gamma = 0;
+};
+
+/**
+ * Returns an amplitude damping channel factory object.
+ */
+template
+inline AmplitudeDampingChannel amplitude_damp(double gamma) {
+ return AmplitudeDampingChannel(gamma);
+}
+
+/**
+ * Phase damping channel factory.
+ */
+template
+struct PhaseDampingChannel {
+ PhaseDampingChannel(double gamma) : gamma(gamma) {}
+
+ static Channel> Create(
+ unsigned time, unsigned q, double gamma) {
+ double p1 = 1 - gamma;
+ double p2 = 0;
+
+ fp_type r = std::sqrt(p1);
+ fp_type s = std::sqrt(gamma);
+
+ using M = GateMatrix1;
+ auto normal = KrausOperator>::kNormal;
+
+ return {{normal, 0, p1,
+ {M::Create(time, q, {1, 0, 0, 0, 0, 0, r, 0})},
+ {1, 0, 0, 0, 0, 0, r * r, 0}, {q},
+ },
+ {normal, 0, p2,
+ {M::Create(time, q, {0, 0, 0, 0, 0, 0, s, 0})},
+ {0, 0, 0, 0, 0, 0, s * s, 0}, {q},
+ },
+ };
+ }
+
+ Channel> Create(unsigned time, unsigned q) const {
+ return Create(time, q, gamma);
+ }
+
+ double gamma = 0;
+};
+
+/**
+ * Returns a phase damping channel factory object.
+ */
+template
+inline PhaseDampingChannel phase_damp(double gamma) {
+ return PhaseDampingChannel(gamma);
+}
+
+} // namespace qsim
+
+#endif // CHANNELS_QSIM_H_
diff --git a/lib/expect.h b/lib/expect.h
index 2943c713..38fabfe4 100644
--- a/lib/expect.h
+++ b/lib/expect.h
@@ -42,18 +42,27 @@ struct OpString {
* @param ket Temporary state vector.
* @return The computed expectation value.
*/
-template
+template
std::complex ExpectationValue(
const typename Fuser::Parameter& param,
const std::vector>& strings,
- const typename Simulator::StateSpace& ss, const Simulator& simulator,
- const typename Simulator::State& state, typename Simulator::State& ket) {
+ const typename Simulator::StateSpace& state_space,
+ const Simulator& simulator, const typename Simulator::State& state,
+ typename Simulator::State& ket) {
std::complex eval = 0;
+ if (state_space.IsNull(ket) || ket.num_qubits() < state.num_qubits()) {
+ ket = state_space.Create(state.num_qubits());
+ if (state_space.IsNull(ket)) {
+ IO::errorf("not enough memory: is the number of qubits too large?\n");
+ return eval;
+ }
+ }
+
for (const auto& str : strings) {
if (str.ops.size() == 0) continue;
- ss.Copy(state, ket);
+ state_space.Copy(state, ket);
if (str.ops.size() == 1) {
const auto& op = str.ops[0];
@@ -70,7 +79,7 @@ std::complex ExpectationValue(
}
}
- eval += str.weight * ss.InnerProduct(state, ket);
+ eval += str.weight * state_space.InnerProduct(state, ket);
}
return eval;
@@ -88,7 +97,7 @@ std::complex ExpectationValue(
* @param state The state of the system.
* @return The computed expectation value.
*/
-template
+template
std::complex ExpectationValue(
const std::vector>& strings,
const Simulator& simulator, const typename Simulator::State& state) {
@@ -123,8 +132,8 @@ std::complex ExpectationValue(
break;
}
- auto matrix = CalculateFusedMatrix(fgate);
- auto r = simulator.ExpectationValue(fgate.qubits, matrix.data(), state);
+ auto r = simulator.ExpectationValue(
+ fgate.qubits, fgate.matrix.data(), state);
eval += str.weight * r;
}
}
diff --git a/lib/fuser.h b/lib/fuser.h
index 927349f1..93933975 100644
--- a/lib/fuser.h
+++ b/lib/fuser.h
@@ -50,6 +50,10 @@ struct GateFused {
* Ordered list of component gates.
*/
std::vector gates;
+ /**
+ * Fused gate matrix.
+ */
+ Matrix matrix;
};
/**
@@ -134,16 +138,14 @@ class Fuser {
/**
* Multiplies component gate matrices of a fused gate.
* @param gate Fused gate.
- * @return Matrix product of component matrices.
*/
-template
-inline Matrix CalculateFusedMatrix(const FusedGate& gate) {
- Matrix matrix;
- MatrixIdentity(unsigned{1} << gate.qubits.size(), matrix);
+template
+inline void CalculateFusedMatrix(FusedGate& gate) {
+ MatrixIdentity(unsigned{1} << gate.qubits.size(), gate.matrix);
for (auto pgate : gate.gates) {
if (gate.qubits.size() == pgate->qubits.size()) {
- MatrixMultiply(gate.qubits.size(), pgate->matrix, matrix);
+ MatrixMultiply(gate.qubits.size(), pgate->matrix, gate.matrix);
} else {
unsigned mask = 0;
@@ -157,11 +159,31 @@ inline Matrix CalculateFusedMatrix(const FusedGate& gate) {
}
MatrixMultiply(mask, pgate->qubits.size(), pgate->matrix,
- gate.qubits.size(), matrix);
+ gate.qubits.size(), gate.matrix);
}
}
+}
- return matrix;
+/**
+ * Multiplies component gate matrices for a range of fused gates.
+ * @param gbeg, gend The iterator range [gbeg, gend) of fused gates.
+ */
+template
+inline void CalculateFusedMatrices(Iterator gbeg, Iterator gend) {
+ for (auto g = gbeg; g != gend; ++g) {
+ if (g->kind != gate::kMeasurement) {
+ CalculateFusedMatrix(*g);
+ }
+ }
+}
+
+/**
+ * Multiplies component gate matrices for a vector of fused gates.
+ * @param gates The vector of fused gates.
+ */
+template
+inline void CalculateFusedMatrices(std::vector& gates) {
+ CalculateFusedMatrices(gates.begin(), gates.end());
}
} // namespace qsim
diff --git a/lib/fuser_basic.h b/lib/fuser_basic.h
index 345db25a..2fafb14a 100644
--- a/lib/fuser_basic.h
+++ b/lib/fuser_basic.h
@@ -53,31 +53,31 @@ class BasicGateFuser final : public Fuser {
/**
* Stores sets of gates that can be applied together. Only one- and
- * two-qubit gates will get fused. Gates fused with this method are not
- * multiplied together until ApplyFusedGate is called on the output.
- * To respect specific time boundaries while fusing gates, use the other
- * version of this method below.
+ * two-qubit gates will get fused. To respect specific time boundaries while
+ * fusing gates, use the other version of this method below.
* @param param Options for gate fusion.
- * @param num_qubits The number of qubits acted on by 'gates'.
+ * @param max_qubit1 The maximum qubit index (plus one) acted on by 'gates'.
* @param gates The gates (or pointers to the gates) to be fused.
* Gate times of the gates that act on the same qubits should be ordered.
* Gates that are out of time order should not cross the time boundaries
* set by measurement gates.
+ * @param fuse_matrix If true, multiply gate matrices together.
* @return A vector of fused gate objects. Each element is a set of gates
* acting on a specific pair of qubits which can be applied as a group.
*/
static std::vector FuseGates(const Parameter& param,
- unsigned num_qubits,
- const std::vector& gates) {
- return FuseGates(param, num_qubits, gates.cbegin(), gates.cend(), {});
+ unsigned max_qubit1,
+ const std::vector& gates,
+ bool fuse_matrix = true) {
+ return FuseGates(
+ param, max_qubit1, gates.cbegin(), gates.cend(), {}, fuse_matrix);
}
/**
* Stores sets of gates that can be applied together. Only one- and
- * two-qubit gates will get fused. Gates fused with this method are not
- * multiplied together until ApplyFusedGate is called on the output.
+ * two-qubit gates will get fused.
* @param param Options for gate fusion.
- * @param num_qubits The number of qubits acted on by 'gates'.
+ * @param max_qubit1 The maximum qubit index (plus one) acted on by 'gates'.
* @param gates The gates (or pointers to the gates) to be fused.
* Gate times of the gates that act on the same qubits should be ordered.
* Gates that are out of time order should not cross the time boundaries
@@ -85,45 +85,46 @@ class BasicGateFuser final : public Fuser {
* @param times_to_split_at Ordered list of time steps (boundaries) at which
* to separate fused gates. Each element of the output will contain gates
* from a single 'window' in this list.
+ * @param fuse_matrix If true, multiply gate matrices together.
* @return A vector of fused gate objects. Each element is a set of gates
* acting on a specific pair of qubits which can be applied as a group.
*/
static std::vector FuseGates(
const Parameter& param,
- unsigned num_qubits, const std::vector& gates,
- const std::vector& times_to_split_at) {
- return FuseGates(param, num_qubits, gates.cbegin(), gates.cend(),
- times_to_split_at);
+ unsigned max_qubit1, const std::vector& gates,
+ const std::vector& times_to_split_at,
+ bool fuse_matrix = true) {
+ return FuseGates(param, max_qubit1, gates.cbegin(), gates.cend(),
+ times_to_split_at, fuse_matrix);
}
/**
* Stores sets of gates that can be applied together. Only one- and
- * two-qubit gates will get fused. Gates fused with this method are not
- * multiplied together until ApplyFusedGate is called on the output.
- * To respect specific time boundaries while fusing gates, use the other
- * version of this method below.
+ * two-qubit gates will get fused. To respect specific time boundaries while
+ * fusing gates, use the other version of this method below.
* @param param Options for gate fusion.
- * @param num_qubits The number of qubits acted on by gates.
+ * @param max_qubit1 The maximum qubit index (plus one) acted on by 'gates'.
* @param gfirst, glast The iterator range [gfirst, glast) to fuse gates
* (or pointers to gates) in. Gate times of the gates that act on the same
* qubits should be ordered. Gates that are out of time order should not
* cross the time boundaries set by measurement gates.
+ * @param fuse_matrix If true, multiply gate matrices together.
* @return A vector of fused gate objects. Each element is a set of gates
* acting on a specific pair of qubits which can be applied as a group.
*/
static std::vector FuseGates(
- const Parameter& param, unsigned num_qubits,
+ const Parameter& param, unsigned max_qubit1,
typename std::vector::const_iterator gfirst,
- typename std::vector::const_iterator glast) {
- return FuseGates(param, num_qubits, gfirst, glast, {});
+ typename std::vector::const_iterator glast,
+ bool fuse_matrix = true) {
+ return FuseGates(param, max_qubit1, gfirst, glast, {}, fuse_matrix);
}
/**
* Stores sets of gates that can be applied together. Only one- and
- * two-qubit gates will get fused. Gates fused with this method are not
- * multiplied together until ApplyFusedGate is called on the output.
+ * two-qubit gates will get fused.
* @param param Options for gate fusion.
- * @param num_qubits The number of qubits acted on by gates.
+ * @param max_qubit1 The maximum qubit index (plus one) acted on by 'gates'.
* @param gfirst, glast The iterator range [gfirst, glast) to fuse gates
* (or pointers to gates) in. Gate times of the gates that act on the same
* qubits should be ordered. Gates that are out of time order should not
@@ -132,14 +133,16 @@ class BasicGateFuser final : public Fuser {
* @param times_to_split_at Ordered list of time steps (boundaries) at which
* to separate fused gates. Each element of the output will contain gates
* from a single 'window' in this list.
+ * @param fuse_matrix If true, multiply gate matrices together.
* @return A vector of fused gate objects. Each element is a set of gates
* acting on a specific pair of qubits which can be applied as a group.
*/
static std::vector FuseGates(
- const Parameter& param, unsigned num_qubits,
+ const Parameter& param, unsigned max_qubit1,
typename std::vector::const_iterator gfirst,
typename std::vector::const_iterator glast,
- const std::vector& times_to_split_at) {
+ const std::vector& times_to_split_at,
+ bool fuse_matrix = true) {
std::vector gates_fused;
if (gfirst >= glast) return gates_fused;
@@ -159,7 +162,7 @@ class BasicGateFuser final : public Fuser {
std::vector gates_seq;
// Lattice of gates: qubits "hyperplane" and time direction.
- std::vector> gates_lat(num_qubits);
+ std::vector> gates_lat(max_qubit1);
// Current unfused gate.
auto gate_it = gfirst;
@@ -168,7 +171,7 @@ class BasicGateFuser final : public Fuser {
gates_seq.resize(0);
gates_seq.reserve(num_gates);
- for (unsigned k = 0; k < num_qubits; ++k) {
+ for (unsigned k = 0; k < max_qubit1; ++k) {
gates_lat[k].resize(0);
gates_lat[k].reserve(128);
}
@@ -179,9 +182,7 @@ class BasicGateFuser final : public Fuser {
if (gate.time > times[l]) break;
- if (GateIsOutOfOrder(gate.time, gate.qubits, gates_lat)
- || GateIsOutOfOrder(gate.time, gate.controlled_by, gates_lat)) {
- IO::errorf("gate is out of time order.\n");
+ if (!ValidateGate(gate, max_qubit1, gates_lat)) {
gates_fused.resize(0);
return gates_fused;
}
@@ -190,7 +191,7 @@ class BasicGateFuser final : public Fuser {
auto& mea_gates_at_time = measurement_gates[gate.time];
if (mea_gates_at_time.size() == 0) {
gates_seq.push_back(&gate);
- mea_gates_at_time.reserve(num_qubits);
+ mea_gates_at_time.reserve(max_qubit1);
}
mea_gates_at_time.push_back(&gate);
@@ -214,7 +215,7 @@ class BasicGateFuser final : public Fuser {
}
}
- std::vector last(num_qubits, 0);
+ std::vector last(max_qubit1, 0);
const RGate* delayed_measurement_gate = nullptr;
@@ -243,11 +244,11 @@ class BasicGateFuser final : public Fuser {
}
gates_fused.push_back({pgate->kind, pgate->time, pgate->qubits,
- pgate, {pgate}});
+ pgate, {pgate}, {}});
} else if (pgate->qubits.size() == 1) {
unsigned q0 = pgate->qubits[0];
- GateFused gate_f = {pgate->kind, pgate->time, {q0}, pgate, {}};
+ GateFused gate_f = {pgate->kind, pgate->time, {q0}, pgate, {}, {}};
last[q0] = Advance(last[q0], gates_lat[q0], gate_f.gates);
gate_f.gates.push_back(gates_lat[q0][last[q0]]);
@@ -260,7 +261,8 @@ class BasicGateFuser final : public Fuser {
if (Done(last[q0], pgate->time, gates_lat[q0])) continue;
- GateFused gate_f = {pgate->kind, pgate->time, {q0, q1}, pgate, {}};
+ GateFused gate_f =
+ {pgate->kind, pgate->time, {q0, q1}, pgate, {}, {}};
do {
last[q0] = Advance(last[q0], gates_lat[q0], gate_f.gates);
@@ -277,7 +279,7 @@ class BasicGateFuser final : public Fuser {
}
}
- for (unsigned q = 0; q < num_qubits; ++q) {
+ for (unsigned q = 0; q < max_qubit1; ++q) {
auto l = last[q];
if (l == gates_lat[q].size()) continue;
@@ -290,7 +292,7 @@ class BasicGateFuser final : public Fuser {
const auto& mea_gates_at_time = measurement_gates[pgate->time];
- GateFused gate_f = {pgate->kind, pgate->time, {}, pgate, {}};
+ GateFused gate_f = {pgate->kind, pgate->time, {}, pgate, {}, {}};
gate_f.gates.reserve(mea_gates_at_time.size());
// Fuse measurement gates with equal times.
@@ -307,6 +309,14 @@ class BasicGateFuser final : public Fuser {
if (gate_it == glast) break;
}
+ if (fuse_matrix) {
+ for (auto& gate_f : gates_fused) {
+ if (gate_f.kind != gate::kMeasurement && gate_f.kind != gate::kDecomp) {
+ CalculateFusedMatrix(gate_f);
+ }
+ }
+ }
+
return gates_fused;
}
@@ -338,7 +348,7 @@ class BasicGateFuser final : public Fuser {
std::vector& gates_fused) {
auto pgate = gates_lat[q][k];
- GateFused gate_f = {pgate->kind, pgate->time, {q}, pgate, {}};
+ GateFused gate_f = {pgate->kind, pgate->time, {q}, pgate, {}, {}};
gate_f.gates.push_back(pgate);
k = Advance(k + 1, gates_lat[q], gate_f.gates);
@@ -348,17 +358,34 @@ class BasicGateFuser final : public Fuser {
return k;
}
- template
- static bool GateIsOutOfOrder(unsigned time,
- const std::vector& qubits,
- const GatesLat& gates_lat) {
- for (unsigned q : qubits) {
- if (!gates_lat[q].empty() && time <= gates_lat[q].back()->time) {
- return true;
+ template
+ static bool ValidateGate(const Gate2& gate, unsigned max_qubit1,
+ const GatesLat& gates_lat) {
+ for (unsigned q : gate.qubits) {
+ if (q >= max_qubit1) {
+ IO::errorf("fuser: gate qubit %u is out of range "
+ "(should be smaller than %u).\n", q, max_qubit1);
+ return false;
+ }
+ if (!gates_lat[q].empty() && gate.time <= gates_lat[q].back()->time) {
+ IO::errorf("fuser: gate at time %u is out of time order.\n", gate.time);
+ return false;
+ }
+ }
+
+ for (unsigned q : gate.controlled_by) {
+ if (q >= max_qubit1) {
+ IO::errorf("fuser: gate qubit %u is out of range "
+ "(should be smaller than %u).\n", q, max_qubit1);
+ return false;
+ }
+ if (!gates_lat[q].empty() && gate.time <= gates_lat[q].back()->time) {
+ IO::errorf("fuser: gate at time %u is out of time order.\n", gate.time);
+ return false;
}
}
- return false;
+ return true;
}
};
diff --git a/lib/fuser_mqubit.h b/lib/fuser_mqubit.h
index 66273893..8ddb029e 100644
--- a/lib/fuser_mqubit.h
+++ b/lib/fuser_mqubit.h
@@ -152,31 +152,31 @@ class MultiQubitGateFuser final : public Fuser {
};
/**
- * Stores sets of gates that can be applied together. Note that
- * gates fused with this method are not multiplied together until
- * ApplyFusedGate is called on the output. To respect specific time
- * boundaries while fusing gates, use the other version of this method below.
+ * Stores sets of gates that can be applied together. To respect specific
+ * time boundaries while fusing gates, use the other version of this method
+ * below.
* @param param Options for gate fusion.
- * @param num_qubits The number of qubits acted on by 'gates'.
+ * @param max_qubit1 The maximum qubit index (plus one) acted on by 'gates'.
* @param gates The gates (or pointers to the gates) to be fused.
* Gate times of the gates that act on the same qubits should be ordered.
* Gates that are out of time order should not cross the time boundaries
* set by measurement gates.
+ * @param fuse_matrix If true, multiply gate matrices together.
* @return A vector of fused gate objects. Each element is a set of gates
* acting on a specific pair of qubits which can be applied as a group.
*/
static std::vector FuseGates(const Parameter& param,
- unsigned num_qubits,
- const std::vector& gates) {
- return FuseGates(param, num_qubits, gates.cbegin(), gates.cend(), {});
+ unsigned max_qubit1,
+ const std::vector& gates,
+ bool fuse_matrix = true) {
+ return FuseGates(
+ param, max_qubit1, gates.cbegin(), gates.cend(), {}, fuse_matrix);
}
/**
- * Stores sets of gates that can be applied together. Note that
- * gates fused with this method are not multiplied together until
- * ApplyFusedGate is called on the output.
+ * Stores sets of gates that can be applied together.
* @param param Options for gate fusion.
- * @param num_qubits The number of qubits acted on by 'gates'.
+ * @param max_qubit1 The maximum qubit index (plus one) acted on by 'gates'.
* @param gates The gates (or pointers to the gates) to be fused.
* Gate times of the gates that act on the same qubits should be ordered.
* Gates that are out of time order should not cross the time boundaries
@@ -184,44 +184,45 @@ class MultiQubitGateFuser final : public Fuser {
* @param times_to_split_at Ordered list of time steps (boundaries) at which
* to separate fused gates. Each element of the output will contain gates
* from a single 'window' in this list.
+ * @param fuse_matrix If true, multiply gate matrices together.
* @return A vector of fused gate objects. Each element is a set of gates
* acting on a specific pair of qubits which can be applied as a group.
*/
static std::vector FuseGates(
const Parameter& param,
- unsigned num_qubits, const std::vector& gates,
- const std::vector& times_to_split_at) {
- return FuseGates(param, num_qubits, gates.cbegin(), gates.cend(),
- times_to_split_at);
+ unsigned max_qubit1, const std::vector& gates,
+ const std::vector& times_to_split_at,
+ bool fuse_matrix = true) {
+ return FuseGates(param, max_qubit1, gates.cbegin(), gates.cend(),
+ times_to_split_at, fuse_matrix);
}
/**
- * Stores sets of gates that can be applied together. Note that
- * gates fused with this method are not multiplied together until
- * ApplyFusedGate is called on the output. To respect specific time
- * boundaries while fusing gates, use the other version of this method below.
+ * Stores sets of gates that can be applied together. To respect specific
+ * time boundaries while fusing gates, use the other version of this method
+ * below.
* @param param Options for gate fusion.
- * @param num_qubits The number of qubits acted on by gates.
+ * @param max_qubit1 The maximum qubit index (plus one) acted on by 'gates'.
* @param gfirst, glast The iterator range [gfirst, glast) to fuse gates
* (or pointers to gates) in. Gate times of the gates that act on the same
* qubits should be ordered. Gates that are out of time order should not
* cross the time boundaries set by measurement gates.
+ * @param fuse_matrix If true, multiply gate matrices together.
* @return A vector of fused gate objects. Each element is a set of gates
* acting on a specific pair of qubits which can be applied as a group.
*/
static std::vector FuseGates(
- const Parameter& param, unsigned num_qubits,
+ const Parameter& param, unsigned max_qubit1,
typename std::vector::const_iterator gfirst,
- typename std::vector::const_iterator glast) {
- return FuseGates(param, num_qubits, gfirst, glast, {});
+ typename std::vector::const_iterator glast,
+ bool fuse_matrix = true) {
+ return FuseGates(param, max_qubit1, gfirst, glast, {}, fuse_matrix);
}
/**
- * Stores sets of gates that can be applied together. Note that
- * gates fused with this method are not multiplied together until
- * ApplyFusedGate is called on the output.
+ * Stores sets of gates that can be applied together.
* @param param Options for gate fusion.
- * @param num_qubits The number of qubits acted on by gates.
+ * @param max_qubit1 The maximum qubit index (plus one) acted on by 'gates'.
* @param gfirst, glast The iterator range [gfirst, glast) to fuse gates
* (or pointers to gates) in. Gate times of the gates that act on the same
* qubits should be ordered. Gates that are out of time order should not
@@ -230,14 +231,16 @@ class MultiQubitGateFuser final : public Fuser {
* @param times_to_split_at Ordered list of time steps (boundaries) at which
* to separate fused gates. Each element of the output will contain gates
* from a single 'window' in this list.
+ * @param fuse_matrix If true, multiply gate matrices together.
* @return A vector of fused gate objects. Each element is a set of gates
* acting on a specific pair of qubits which can be applied as a group.
*/
static std::vector FuseGates(
- const Parameter& param, unsigned num_qubits,
+ const Parameter& param, unsigned max_qubit1,
typename std::vector::const_iterator gfirst,
typename std::vector::const_iterator glast,
- const std::vector& times_to_split_at) {
+ const std::vector& times_to_split_at,
+ bool fuse_matrix = true) {
std::vector fused_gates;
if (gfirst >= glast) return fused_gates;
@@ -250,7 +253,7 @@ class MultiQubitGateFuser final : public Fuser {
auto epochs =
Base::MergeWithMeasurementTimes(gfirst, glast, times_to_split_at);
- LinkManager link_manager(num_qubits * num_gates);
+ LinkManager link_manager(max_qubit1 * num_gates);
// Auxillary data structures.
// Sequence of intermediate fused gates.
@@ -258,10 +261,10 @@ class MultiQubitGateFuser final : public Fuser {
// Gate "lattice".
std::vector gates_lat;
// Sequences of intermediate fused gates ordered by gate size.
- std::vector> fgates(num_qubits + 1);
+ std::vector> fgates(max_qubit1 + 1);
gates_seq.reserve(num_gates);
- gates_lat.reserve(num_qubits);
+ gates_lat.reserve(max_qubit1);
Scratch scratch;
@@ -274,10 +277,10 @@ class MultiQubitGateFuser final : public Fuser {
scratch.stack.reserve(8);
Stat stat;
- stat.num_gates.resize(num_qubits + 1, 0);
+ stat.num_gates.resize(max_qubit1 + 1, 0);
unsigned max_fused_size = std::min(unsigned{6}, param.max_fused_size);
- max_fused_size = std::min(max_fused_size, num_qubits);
+ max_fused_size = std::min(max_fused_size, max_qubit1);
auto gate_it = gfirst;
@@ -285,9 +288,9 @@ class MultiQubitGateFuser final : public Fuser {
for (std::size_t l = 0; l < epochs.size(); ++l) {
gates_seq.resize(0);
gates_lat.resize(0);
- gates_lat.resize(num_qubits, nullptr);
+ gates_lat.resize(max_qubit1, nullptr);
- for (unsigned i = 0; i <= num_qubits; ++i) {
+ for (unsigned i = 0; i <= max_qubit1; ++i) {
fgates[i].resize(0);
}
@@ -300,9 +303,7 @@ class MultiQubitGateFuser final : public Fuser {
if (gate.time > epochs[l]) break;
- if (GateIsOutOfOrder(gate.time, gate.qubits, gates_lat)
- || GateIsOutOfOrder(gate.time, gate.controlled_by, gates_lat)) {
- IO::errorf("gate is out of time order.\n");
+ if (!ValidateGate(gate, max_qubit1, gates_lat)) {
fused_gates.resize(0);
return fused_gates;
}
@@ -317,8 +318,8 @@ class MultiQubitGateFuser final : public Fuser {
gates_seq.push_back({&gate, {}, {}, {}, 0, kMeaCnt});
last_mea_gate = &gates_seq.back();
- last_mea_gate->qubits.reserve(num_qubits);
- last_mea_gate->links.reserve(num_qubits);
+ last_mea_gate->qubits.reserve(max_qubit1);
+ last_mea_gate->links.reserve(max_qubit1);
++stat.num_fused_mea_gates;
}
@@ -391,20 +392,53 @@ class MultiQubitGateFuser final : public Fuser {
if (max_fused_size > 2) {
FuseGateSequences(
- max_fused_size, num_qubits, scratch, gates_seq, stat, fused_gates);
+ max_fused_size, max_qubit1, scratch, gates_seq, stat, fused_gates);
} else {
+ unsigned prev_time = 0;
+
+ std::vector orphaned_gates;
+ orphaned_gates.reserve(max_qubit1);
+
for (auto& fgate : gates_seq) {
- if (fgate.gates.size() > 0) {
- // Assume fgate.qubits (gate.qubits) are sorted.
- fused_gates.push_back({fgate.parent->kind, fgate.parent->time,
- std::move(fgate.qubits), fgate.parent,
- std::move(fgate.gates)});
-
- if (fgate.visited != kMeaCnt) {
- ++stat.num_fused_gates;
+ if (fgate.gates.size() == 0) continue;
+
+ if (prev_time != fgate.parent->time) {
+ if (orphaned_gates.size() > 0) {
+ FuseOrphanedGates(
+ max_fused_size, stat, orphaned_gates, fused_gates);
+ orphaned_gates.resize(0);
}
+
+ prev_time = fgate.parent->time;
+ }
+
+ if (fgate.qubits.size() == 1 && max_fused_size > 1
+ && fgate.visited != kMeaCnt && !fgate.parent->unfusible) {
+ orphaned_gates.push_back(&fgate);
+ continue;
+ }
+
+ // Assume fgate.qubits (gate.qubits) are sorted.
+ fused_gates.push_back({fgate.parent->kind, fgate.parent->time,
+ std::move(fgate.qubits), fgate.parent,
+ std::move(fgate.gates), {}});
+
+ if (fgate.visited != kMeaCnt) {
+ ++stat.num_fused_gates;
}
}
+
+ if (orphaned_gates.size() > 0) {
+ FuseOrphanedGates(max_fused_size, stat, orphaned_gates, fused_gates);
+ }
+ }
+ }
+
+ if (fuse_matrix) {
+ for (auto& fgate : fused_gates) {
+ if (fgate.kind != gate::kMeasurement && fgate.kind != gate::kDecomp) {
+ CalculateFusedMatrix(fgate);
+ }
}
}
@@ -448,13 +482,13 @@ class MultiQubitGateFuser final : public Fuser {
//
// max_fused_size = 6: _-_-_
static void FuseGateSequences(unsigned max_fused_size,
- unsigned num_qubits, Scratch& scratch,
+ unsigned max_qubit1, Scratch& scratch,
std::vector& gates_seq, Stat& stat,
std::vector& fused_gates) {
unsigned prev_time = 0;
std::vector orphaned_gates;
- orphaned_gates.reserve(num_qubits);
+ orphaned_gates.reserve(max_qubit1);
for (auto& fgate : gates_seq) {
if (prev_time != fgate.parent->time) {
@@ -471,14 +505,14 @@ class MultiQubitGateFuser final : public Fuser {
if (fgate.visited == kMeaCnt || fgate.qubits.size() >= max_fused_size
|| fgate.parent->unfusible) {
if (fgate.visited != kMeaCnt) {
- ++stat.num_fused_gates;
+ ++stat.num_fused_gates;
}
fgate.visited = kFinal;
fused_gates.push_back({fgate.parent->kind, fgate.parent->time,
std::move(fgate.qubits), fgate.parent,
- std::move(fgate.gates)});
+ std::move(fgate.gates), {}});
continue;
}
@@ -503,7 +537,7 @@ class MultiQubitGateFuser final : public Fuser {
fused_gates.push_back({fgate->parent->kind, fgate->parent->time,
std::move(fgate->qubits), fgate->parent,
- std::move(fgate->gates)});
+ std::move(fgate->gates), {}});
++stat.num_fused_gates;
}
@@ -564,7 +598,7 @@ class MultiQubitGateFuser final : public Fuser {
fused_gates.push_back({ogate1->parent->kind, ogate1->parent->time,
std::move(ogate1->qubits), ogate1->parent,
- std::move(ogate1->gates)});
+ std::move(ogate1->gates), {}});
++stat.num_fused_gates;
}
@@ -921,7 +955,9 @@ class MultiQubitGateFuser final : public Fuser {
if (ln != nullptr && rn != nullptr) {
return R()(ln->val->parent->time, rn->val->parent->time);
} else {
- return ln != nullptr || rn == nullptr;
+ // nullptrs are larger than everything else and
+ // equivalent among each other.
+ return ln != nullptr;
}
});
@@ -963,7 +999,7 @@ class MultiQubitGateFuser final : public Fuser {
static void PrintStat(unsigned verbosity, const Stat& stat,
const std::vector& fused_gates) {
- if (verbosity == 0) return;
+ if (verbosity < 3) return;
if (stat.num_controlled_gates > 0) {
IO::messagef("%lu controlled gates\n", stat.num_controlled_gates);
@@ -992,10 +1028,10 @@ class MultiQubitGateFuser final : public Fuser {
IO::messagef(" gates are fused into %lu gates\n", stat.num_fused_gates);
- if (verbosity == 1) return;
+ if (verbosity < 5) return;
IO::messagef("fused gate qubits:\n");
- for (const auto g : fused_gates) {
+ for (const auto& g : fused_gates) {
IO::messagef("%6u ", g.parent->time);
if (g.parent->kind == gate::kMeasurement) {
IO::messagef("m");
@@ -1016,17 +1052,36 @@ class MultiQubitGateFuser final : public Fuser {
}
}
- template
- static bool GateIsOutOfOrder(unsigned time,
- const std::vector& qubits,
- const GatesLat& gates_lat) {
- for (unsigned q : qubits) {
- if (gates_lat[q] != nullptr && time <= gates_lat[q]->val->parent->time) {
- return true;
+ template
+ static bool ValidateGate(const Gate2& gate, unsigned max_qubit1,
+ const GatesLat& gates_lat) {
+ for (unsigned q : gate.qubits) {
+ if (q >= max_qubit1) {
+ IO::errorf("fuser: gate qubit %u is out of range "
+ "(should be smaller than %u).\n", q, max_qubit1);
+ return false;
+ }
+ if (gates_lat[q] != nullptr
+ && gate.time <= gates_lat[q]->val->parent->time) {
+ IO::errorf("fuser: gate at time %u is out of time order.\n", gate.time);
+ return false;
+ }
+ }
+
+ for (unsigned q : gate.controlled_by) {
+ if (q >= max_qubit1) {
+ IO::errorf("fuser: gate qubit %u is out of range "
+ "(should be smaller than %u).\n", q, max_qubit1);
+ return false;
+ }
+ if (gates_lat[q] != nullptr
+ && gate.time <= gates_lat[q]->val->parent->time) {
+ IO::errorf("fuser: gate at time %u is out of time order.\n", gate.time);
+ return false;
}
}
- return false;
+ return true;
}
};
diff --git a/lib/gate_appl.h b/lib/gate_appl.h
index 59b60082..8601e6f2 100644
--- a/lib/gate_appl.h
+++ b/lib/gate_appl.h
@@ -136,13 +136,12 @@ template
inline void ApplyFusedGate(const Simulator& simulator, const Gate& gate,
typename Simulator::State& state) {
if (gate.kind != gate::kMeasurement) {
- using fp_type = typename Simulator::fp_type;
- auto matrix = CalculateFusedMatrix(gate);
if (gate.parent->controlled_by.size() == 0) {
- simulator.ApplyGate(gate.qubits, matrix.data(), state);
+ simulator.ApplyGate(gate.qubits, gate.matrix.data(), state);
} else {
simulator.ApplyControlledGate(gate.qubits, gate.parent->controlled_by,
- gate.parent->cmask, matrix.data(), state);
+ gate.parent->cmask, gate.matrix.data(),
+ state);
}
}
}
@@ -160,9 +159,9 @@ template
inline void ApplyFusedGateDagger(const Simulator& simulator, const Gate& gate,
typename Simulator::State& state) {
if (gate.kind != gate::kMeasurement) {
- using fp_type = typename Simulator::fp_type;
- auto matrix = CalculateFusedMatrix(gate);
+ auto matrix = gate.matrix;
MatrixDagger(unsigned{1} << gate.qubits.size(), matrix);
+
if (gate.parent->controlled_by.size() == 0) {
simulator.ApplyGate(gate.qubits, matrix.data(), state);
} else {
diff --git a/lib/gates_qsim.h b/lib/gates_qsim.h
index 7690c2b6..9b64cf98 100644
--- a/lib/gates_qsim.h
+++ b/lib/gates_qsim.h
@@ -46,6 +46,8 @@ enum GateKind {
kGateIS, // iSwap
kGateFS, // fSim
kGateCP, // control phase
+ kGateMatrix1, // One-qubit matrix gate.
+ kGateMatrix2, // Two-qubit matrix gate.
kDecomp = gate::kDecomp,
kMeasurement = gate::kMeasurement,
};
@@ -317,6 +319,24 @@ struct GateS {
}
};
+/**
+ * A one-qubit gate defined entirely by its matrix.
+ */
+template
+struct GateMatrix1 {
+ static constexpr GateKind kind = kGateMatrix1;
+ static constexpr char name[] = "mat1";
+ static constexpr unsigned num_qubits = 1;
+ static constexpr bool symmetric = true;
+
+ static GateQSim Create(unsigned time, unsigned q0,
+ const Matrix& m) {
+ auto m2 = m;
+ return
+ CreateGate, GateMatrix1>(time, {q0}, std::move(m2));
+ }
+};
+
// Two-qubit gates:
/**
@@ -566,6 +586,29 @@ struct GateCP {
}
};
+/**
+ * A two-qubit gate defined entirely by its matrix.
+ */
+template
+struct GateMatrix2 {
+ static constexpr GateKind kind = kGateMatrix2;
+ static constexpr char name[] = "mat2";
+ static constexpr unsigned num_qubits = 2;
+ static constexpr bool symmetric = false;
+
+ template >
+ static GateQSim Create(
+ unsigned time, unsigned q0, unsigned q1, M&& m) {
+ return CreateGate, GateMatrix2>(time, {q1, q0},
+ std::forward(m));
+ }
+
+ static schmidt_decomp_type SchmidtDecomp(fp_type phi) {
+ // Not implemented.
+ return schmidt_decomp_type{};
+ }
+};
+
template
inline schmidt_decomp_type GetSchmidtDecomp(
GateKind kind, const std::vector& params) {
diff --git a/lib/hybrid.h b/lib/hybrid.h
index 0ce98b63..d0189efc 100644
--- a/lib/hybrid.h
+++ b/lib/hybrid.h
@@ -40,6 +40,7 @@ struct HybridSimulator final {
// Note that one can use "struct GateHybrid : public Gate {" in C++17.
struct GateHybrid {
using GateKind = HybridSimulator::GateKind;
+ using fp_type = HybridSimulator::fp_type;
GateKind kind;
unsigned time;
@@ -554,7 +555,13 @@ struct HybridSimulator final {
const Simulator& simulator,
typename Simulator::State& state) {
for (std::size_t i = i0; i < i1; ++i) {
- ApplyFusedGate(simulator, gates[i], state);
+ if (gates[i].matrix.size() > 0) {
+ ApplyFusedGate(simulator, gates[i], state);
+ } else {
+ auto gate = gates[i];
+ CalculateFusedMatrix(gate);
+ ApplyFusedGate(simulator, gate, state);
+ }
}
}
diff --git a/lib/matrix.h b/lib/matrix.h
index f9725eca..e126a02d 100644
--- a/lib/matrix.h
+++ b/lib/matrix.h
@@ -92,6 +92,39 @@ inline void MatrixMultiply(
}
}
+/**
+ * Multiplies two gate matrices of equal size: m2 = m1^\dagger m2.
+ * @q Number of gate qubits. The number of matrix rows (columns) is 2^q.
+ * @m1 Matrix m1.
+ * @m2 Input matrix m2. Output product of matrices m2 = m1 m2.
+ */
+template
+inline void MatrixDaggerMultiply(
+ unsigned q, const Matrix& m1, Matrix& m2) {
+ Matrix mt = m2;
+ unsigned n = unsigned{1} << q;
+
+ for (unsigned i = 0; i < n; ++i) {
+ for (unsigned j = 0; j < n; ++j) {
+ fp_type2 re = 0;
+ fp_type2 im = 0;
+
+ for (unsigned k = 0; k < n; ++k) {
+ fp_type2 r1 = m1[2 * (n * k + i)];
+ fp_type2 i1 = m1[2 * (n * k + i) + 1];
+ fp_type2 r2 = mt[2 * (n * k + j)];
+ fp_type2 i2 = mt[2 * (n * k + j) + 1];
+
+ re += r1 * r2 + i1 * i2;
+ im += r1 * i2 - i1 * r2;
+ }
+
+ m2[2 * (n * i + j)] = re;
+ m2[2 * (n * i + j) + 1] = im;
+ }
+ }
+}
+
/**
* Multiplies two gate matrices: m2 = m1 m2. The size of m1 should not exceed
* the size of m2.
diff --git a/lib/mps_simulator.h b/lib/mps_simulator.h
index ae053690..8fbcbae1 100644
--- a/lib/mps_simulator.h
+++ b/lib/mps_simulator.h
@@ -35,11 +35,12 @@ namespace mps {
/**
* Truncated Matrix Product State (MPS) circuit simulator w/ vectorization.
*/
-template
+template
class MPSSimulator final {
public:
- using MPSStateSpace_ = MPSStateSpace;
+ using MPSStateSpace_ = MPSStateSpace;
using State = typename MPSStateSpace_::MPS;
+ using fp_type = typename MPSStateSpace_::fp_type;
using Complex = std::complex;
using Matrix =
@@ -219,7 +220,7 @@ class MPSSimulator final {
block_0.fill(Complex(0, 0));
const auto keep_cols = (svd_u.cols() > bond_dim) ? bond_dim : svd_u.cols();
block_0.block(0, 0, svd_u.rows(), keep_cols).noalias() =
- svd_u(Eigen::all, Eigen::seq(0, keep_cols - 1));
+ svd_u(Eigen::indexing::all, Eigen::seq(0, keep_cols - 1));
// Place row product of S V into scratch to truncate and then B1.
MatrixMap svd_v((Complex*)(raw_state + end), p, 2 * m_dim);
@@ -232,7 +233,8 @@ class MPSSimulator final {
for (unsigned i = 0; i < keep_rows; ++i) {
svd_v.row(i) *= s_vector(i);
}
- block_1.block(0, 0, keep_rows, svd_v.cols()).noalias() = svd_v(row_seq, Eigen::all);
+ block_1.block(0, 0, keep_rows, svd_v.cols()).noalias() =
+ svd_v(row_seq, Eigen::indexing::all);
}
For for_;
diff --git a/lib/mps_statespace.h b/lib/mps_statespace.h
index 888d4d58..9b3acf31 100644
--- a/lib/mps_statespace.h
+++ b/lib/mps_statespace.h
@@ -26,8 +26,10 @@
#include
#include
#include
+#include
#include "../eigen/Eigen/Dense"
+#include "../eigen/unsupported/Eigen/CXX11/Tensor"
namespace qsim {
@@ -51,10 +53,11 @@ inline void free(void* ptr) {
* Class containing context and routines for fixed bond dimension
* truncated Matrix Product State (MPS) simulation.
*/
-template
+template
class MPSStateSpace {
private:
public:
+ using fp_type = FP;
using Pointer = std::unique_ptr;
using Complex = std::complex;
@@ -179,8 +182,8 @@ class MPSStateSpace {
fp_type* state2_raw = state2.get();
// Contract leftmost blocks together, store result in state1 scratch.
- ConstMatrixMap top((Complex*) state2_raw, 2, bond_dim);
- ConstMatrixMap bot((Complex*) state1_raw, 2, bond_dim);
+ ConstMatrixMap top((Complex*)state2_raw, 2, bond_dim);
+ ConstMatrixMap bot((Complex*)state1_raw, 2, bond_dim);
MatrixMap partial_contract((Complex*)(state1_raw + end), bond_dim,
bond_dim);
MatrixMap partial_contract2(
@@ -231,6 +234,326 @@ class MPSStateSpace {
return partial_contract(0, 0);
}
+ // Compute the 2x2 1-RDM of state on index. Result written to rdm.
+ // Requires: scratch and rdm to be allocated.
+ static void ReduceDensityMatrix(MPS& state, MPS& scratch, int index,
+ fp_type* rdm) {
+ const auto num_qubits = state.num_qubits();
+ const auto bond_dim = state.bond_dim();
+ const auto end = Size(state);
+ const bool last_index = (index == num_qubits - 1);
+ const auto right_dim = (last_index ? 1 : bond_dim);
+ auto offset = 0;
+ fp_type* state_raw = state.get();
+ fp_type* scratch_raw = scratch.get();
+ fp_type* state_raw_workspace = state_raw + end + 2 * bond_dim * bond_dim;
+ fp_type* scratch_raw_workspace =
+ scratch_raw + end + 2 * bond_dim * bond_dim;
+
+ Copy(state, scratch);
+
+ // Contract leftmost blocks together, store result in state scratch.
+ ConstMatrixMap top((Complex*)scratch_raw, 2, bond_dim);
+ ConstMatrixMap bot((Complex*)state_raw, 2, bond_dim);
+ MatrixMap partial_contract((Complex*)(state_raw + end), bond_dim, bond_dim);
+ MatrixMap partial_contract2((Complex*)(state_raw_workspace), bond_dim,
+ 2 * bond_dim);
+
+ partial_contract.setZero();
+ partial_contract(0, 0) = 1;
+ if (index > 0) {
+ partial_contract.noalias() = top.adjoint() * bot;
+ }
+
+ // Contract all internal blocks together.
+ for (unsigned i = 1; i < index; ++i) {
+ offset = GetBlockOffset(state, i);
+
+ // reshape:
+ new (&partial_contract2)
+ MatrixMap((Complex*)(state_raw_workspace), bond_dim, 2 * bond_dim);
+
+ // Merge bot into left boundary merged tensor.
+ new (&bot) ConstMatrixMap((Complex*)(state_raw + offset), bond_dim,
+ 2 * bond_dim);
+ partial_contract2.noalias() = partial_contract * bot;
+
+ // reshape:
+ new (&partial_contract2)
+ MatrixMap((Complex*)(state_raw_workspace), 2 * bond_dim, bond_dim);
+
+ // Merge top into partial_contract2.
+ new (&top) ConstMatrixMap((Complex*)(scratch_raw + offset), 2 * bond_dim,
+ bond_dim);
+ partial_contract.noalias() = top.adjoint() * partial_contract2;
+ }
+
+ // The [bond_dim, bond_dim] block in state_raw now contains the contraction
+ // up to, but not including index.
+ // Contract rightmost blocks.
+ offset = GetBlockOffset(state, num_qubits - 1);
+ new (&top) ConstMatrixMap((Complex*)(scratch_raw + offset), bond_dim, 2);
+ new (&bot) ConstMatrixMap((Complex*)(state_raw + offset), bond_dim, 2);
+ new (&partial_contract)
+ MatrixMap((Complex*)(scratch_raw + end), bond_dim, bond_dim);
+ new (&partial_contract2)
+ MatrixMap((Complex*)(scratch_raw_workspace), bond_dim, 2 * bond_dim);
+
+ partial_contract.setZero();
+ partial_contract(0, 0) = 1;
+ if (index < num_qubits - 1) {
+ partial_contract.noalias() = top * bot.adjoint();
+ }
+
+ for (unsigned i = num_qubits - 2; i > index; --i) {
+ offset = GetBlockOffset(state, i);
+
+ // reshape:
+ new (&partial_contract2)
+ MatrixMap((Complex*)(scratch_raw_workspace), 2 * bond_dim, bond_dim);
+
+ // Merge bot into left boundary merged tensor.
+ new (&bot) ConstMatrixMap((Complex*)(state_raw + offset), 2 * bond_dim,
+ bond_dim);
+ partial_contract2.noalias() = bot * partial_contract.adjoint();
+
+ // reshape:
+ new (&partial_contract2)
+ MatrixMap((Complex*)(scratch_raw_workspace), bond_dim, 2 * bond_dim);
+
+ // Merge top into partial_contract2.
+ new (&top) ConstMatrixMap((Complex*)(scratch_raw + offset), bond_dim,
+ 2 * bond_dim);
+ // [bd, bd] = [bd, 2bd] @ [bd, 2bd]
+ partial_contract.noalias() = top * partial_contract2.adjoint();
+ }
+
+ // The [bond_dim, bond_dim] block in scratch_raw now contains the
+ // contraction down from the end, but not including the index. Begin final
+ // contraction steps.
+
+ // Get leftmost [bd, bd] contraction and contract with top.
+
+ offset = GetBlockOffset(state, index);
+ new (&partial_contract)
+ MatrixMap((Complex*)(state_raw + end), bond_dim, bond_dim);
+ new (&top)
+ ConstMatrixMap((Complex*)(state_raw + offset), bond_dim, 2 * right_dim);
+ new (&partial_contract2)
+ MatrixMap((Complex*)(scratch_raw_workspace), bond_dim, 2 * right_dim);
+ partial_contract2.noalias() = partial_contract * top.conjugate();
+ // copy the bottom contraction scratch_raw to state_raw to save space.
+ memcpy(state_raw + end, scratch_raw + end,
+ bond_dim * bond_dim * 2 * sizeof(fp_type));
+
+ // Contract top again for correct shape.
+ fp_type* contract3_target = (last_index ? rdm : scratch_raw);
+ MatrixMap partial_contract3((Complex*)contract3_target, 2 * right_dim,
+ 2 * right_dim);
+ partial_contract3.noalias() = top.transpose() * partial_contract2;
+
+ // If we are contracting the last index, all the needed transforms are done.
+ if (last_index) {
+ return;
+ }
+
+ // Conduct final tensor contraction operations. Cannot be easily compiled to
+ // matmul.
+ const Eigen::TensorMap>
+ t_4d((Complex*)scratch_raw, 2, bond_dim, 2, bond_dim);
+ const Eigen::TensorMap>
+ t_2d((Complex*)(state_raw + end), bond_dim, bond_dim);
+
+ const Eigen::array, 2> product_dims = {
+ Eigen::IndexPair(1, 0),
+ Eigen::IndexPair(3, 1),
+ };
+ Eigen::TensorMap> out(
+ (Complex*)rdm, 2, 2);
+ out = t_4d.contract(t_2d, product_dims);
+ }
+
+ // Draw a single bitstring sample from state using scratch and scratch2
+ // as working space.
+ static void SampleOnce(MPS& state, MPS& scratch, MPS& scratch2,
+ std::mt19937* random_gen, std::vector* sample) {
+ // TODO: carefully profile with perf and optimize temp storage
+ // locations for cache friendliness.
+ const auto bond_dim = state.bond_dim();
+ const auto num_qubits = state.num_qubits();
+ const auto end = Size(state);
+ const auto left_frontier_offset = GetBlockOffset(state, num_qubits + 1);
+ std::default_random_engine generator;
+ fp_type* state_raw = state.get();
+ fp_type* scratch_raw = scratch.get();
+ fp_type* scratch2_raw = scratch2.get();
+ fp_type rdm[8];
+
+ sample->reserve(num_qubits);
+ Copy(state, scratch);
+ Copy(state, scratch2);
+
+ // Store prefix contractions in scratch2.
+ auto offset = GetBlockOffset(state, num_qubits - 1);
+ ConstMatrixMap top((Complex*)(state_raw + offset), bond_dim, 2);
+ ConstMatrixMap bot((Complex*)(scratch_raw + offset), bond_dim, 2);
+ MatrixMap partial_contract((Complex*)(scratch2_raw + offset), bond_dim,
+ bond_dim);
+ MatrixMap partial_contract2((Complex*)(scratch_raw + end), bond_dim,
+ 2 * bond_dim);
+ partial_contract.noalias() = top * bot.adjoint();
+
+ for (unsigned i = num_qubits - 2; i > 0; --i) {
+ offset = GetBlockOffset(state, i);
+ // reshape:
+ new (&partial_contract2)
+ MatrixMap((Complex*)(scratch_raw + end), 2 * bond_dim, bond_dim);
+
+ // Merge bot into left boundary merged tensor.
+ new (&bot) ConstMatrixMap((Complex*)(scratch_raw + offset), 2 * bond_dim,
+ bond_dim);
+ partial_contract2.noalias() = bot * partial_contract.adjoint();
+
+ // reshape:
+ new (&partial_contract2)
+ MatrixMap((Complex*)(scratch_raw + end), bond_dim, 2 * bond_dim);
+
+ // Merge top into partial_contract2.
+ new (&top) ConstMatrixMap((Complex*)(state_raw + offset), bond_dim,
+ 2 * bond_dim);
+
+ // merge into partial_contract -> scracth2_raw.
+ new (&partial_contract)
+ MatrixMap((Complex*)(scratch2_raw + offset), bond_dim, bond_dim);
+ partial_contract.noalias() = top * partial_contract2.adjoint();
+ }
+
+ // Compute RDM-0 and draw first sample.
+ offset = GetBlockOffset(state, 1);
+ new (&top) ConstMatrixMap((Complex*)state_raw, 2, bond_dim);
+ new (&bot) ConstMatrixMap((Complex*)scratch_raw, 2, bond_dim);
+ new (&partial_contract)
+ MatrixMap((Complex*)(scratch2_raw + offset), bond_dim, bond_dim);
+ new (&partial_contract2)
+ MatrixMap((Complex*)(scratch_raw + end), 2, bond_dim);
+
+ partial_contract2.noalias() = bot * partial_contract.adjoint();
+
+ new (&partial_contract) MatrixMap((Complex*)rdm, 2, 2);
+ partial_contract.noalias() = top * partial_contract2.adjoint();
+ auto p0 = rdm[0] / (rdm[0] + rdm[6]);
+ std::bernoulli_distribution distribution(1 - p0);
+ auto bit_val = distribution(*random_gen);
+ sample->push_back(bit_val);
+
+ // collapse state.
+ new (&partial_contract) MatrixMap((Complex*)scratch_raw, 2, bond_dim);
+ partial_contract.row(!bit_val).setZero();
+
+ // Prepare left contraction frontier.
+ new (&partial_contract2) MatrixMap(
+ (Complex*)(scratch2_raw + left_frontier_offset), bond_dim, bond_dim);
+ partial_contract2.noalias() =
+ partial_contract.transpose() * partial_contract.conjugate();
+
+ // Compute RDM-i and draw internal tensor samples.
+ for (unsigned i = 1; i < num_qubits - 1; i++) {
+ // Get leftmost [bd, bd] contraction and contract with top.
+ offset = GetBlockOffset(state, i);
+ new (&partial_contract) MatrixMap(
+ (Complex*)(scratch2_raw + left_frontier_offset), bond_dim, bond_dim);
+ new (&top) ConstMatrixMap((Complex*)(state_raw + offset), bond_dim,
+ 2 * bond_dim);
+ new (&partial_contract2)
+ MatrixMap((Complex*)(state_raw + end), bond_dim, 2 * bond_dim);
+ partial_contract2.noalias() = partial_contract * top.conjugate();
+
+ // Contract top again for correct shape.
+ MatrixMap partial_contract3((Complex*)(scratch_raw + end), 2 * bond_dim,
+ 2 * bond_dim);
+ partial_contract3.noalias() = top.transpose() * partial_contract2;
+
+ // Conduct final tensor contraction operations. Cannot be easily compiled
+ // to matmul. Perf reports shows only ~6% of runtime spent here on large
+ // systems.
+ offset = GetBlockOffset(state, i + 1);
+ const Eigen::TensorMap>
+ t_4d((Complex*)(scratch_raw + end), 2, bond_dim, 2, bond_dim);
+ const Eigen::TensorMap>
+ t_2d((Complex*)(scratch2_raw + offset), bond_dim, bond_dim);
+
+ const Eigen::array, 2> product_dims = {
+ Eigen::IndexPair(1, 0),
+ Eigen::IndexPair(3, 1),
+ };
+ Eigen::TensorMap> out(
+ (Complex*)rdm, 2, 2);
+ out = t_4d.contract(t_2d, product_dims);
+
+ // Sample bit and collapse state.
+ p0 = rdm[0] / (rdm[0] + rdm[6]);
+ distribution = std::bernoulli_distribution(1 - p0);
+ bit_val = distribution(*random_gen);
+
+ sample->push_back(bit_val);
+ offset = GetBlockOffset(state, i);
+ new (&partial_contract)
+ MatrixMap((Complex*)(scratch_raw + offset), bond_dim * 2, bond_dim);
+ for (unsigned j = !bit_val; j < 2 * bond_dim; j += 2) {
+ partial_contract.row(j).setZero();
+ }
+
+ // Update left frontier.
+ new (&partial_contract) MatrixMap(
+ (Complex*)(scratch2_raw + left_frontier_offset), bond_dim, bond_dim);
+
+ // reshape:
+ new (&partial_contract2)
+ MatrixMap((Complex*)(state_raw + end), bond_dim, 2 * bond_dim);
+
+ // Merge bot into left boundary merged tensor.
+ new (&bot) ConstMatrixMap((Complex*)(scratch_raw + offset), bond_dim,
+ 2 * bond_dim);
+ partial_contract2.noalias() = partial_contract * bot.conjugate();
+
+ // reshape:
+ new (&partial_contract2)
+ MatrixMap((Complex*)(state_raw + end), 2 * bond_dim, bond_dim);
+
+ // Merge top into partial_contract2.
+ new (&top) ConstMatrixMap((Complex*)(scratch_raw + offset), 2 * bond_dim,
+ bond_dim);
+ partial_contract.noalias() = top.transpose() * partial_contract2;
+ }
+
+ // Compute RDM-(n-1) and sample.
+ offset = GetBlockOffset(state, num_qubits - 1);
+ new (&partial_contract2)
+ MatrixMap((Complex*)(state_raw + end), bond_dim, 2);
+
+ new (&top) ConstMatrixMap((Complex*)(state_raw + offset), bond_dim, 2);
+ partial_contract2.noalias() = partial_contract * top.conjugate();
+ new (&partial_contract) MatrixMap((Complex*)rdm, 2, 2);
+ partial_contract.noalias() = top.transpose() * partial_contract2;
+
+ p0 = rdm[0] / (rdm[0] + rdm[6]);
+ distribution = std::bernoulli_distribution(1 - p0);
+ bit_val = distribution(*random_gen);
+ sample->push_back(bit_val);
+ }
+
+ // Draw num_samples bitstring samples from state and store the result
+ // bit vectors in results. Uses scratch and scratch2 as workspace.
+ static void Sample(MPS& state, MPS& scratch, MPS& scratch2,
+ unsigned num_samples, unsigned seed,
+ std::vector>* results) {
+ std::mt19937 rand_source(seed);
+ results->reserve(num_samples);
+ for (unsigned i = 0; i < num_samples; i++) {
+ SampleOnce(state, scratch, scratch2, &rand_source, &(*results)[i]);
+ }
+ }
+
// Testing only. Convert the MPS to a wavefunction under "normal" ordering.
// Requires: wf be allocated beforehand with bond_dim * 2 ^ num_qubits -1
// memory.
diff --git a/lib/qtrajectory.h b/lib/qtrajectory.h
index bba4708c..1da6692f 100644
--- a/lib/qtrajectory.h
+++ b/lib/qtrajectory.h
@@ -16,6 +16,7 @@
#define QTRAJECTORY_H_
#include
+#include
#include
#include
#include
@@ -55,6 +56,37 @@ class QuantumTrajectorySimulator {
* If true, normalize the state vector before performing measurements.
*/
bool normalize_before_mea_gates = true;
+ /**
+ * If false, do not apply deferred operators after the main loop for
+ * the "primary" noise trajectory, that is the trajectory in which
+ * the primary (the first operators in their respective channels) Kraus
+ * operators are sampled for each channel and there are no measurements
+ * in the computational basis. This can be used to speed up simulations
+ * of circuits with weak noise and without measurements by reusing
+ * the primary trajectory results. There is an additional condition for
+ * RunBatch. In this case, the deferred operators after the main loop are
+ * still applied for the first occurence of the primary trajectory.
+ * The primary Kraus operators should have the highest sampling
+ * probabilities to achieve the highest speedup.
+ *
+ * It is the client's responsibility to collect the primary trajectory
+ * results and to reuse them.
+ */
+ bool apply_last_deferred_ops = true;
+ };
+
+ /**
+ * Struct with statistics to populate by RunBatch and RunOnce methods.
+ */
+ struct Stat {
+ /**
+ * Indices of sampled Kraus operator indices and/or measured bitstrings.
+ */
+ std::vector samples;
+ /**
+ * True if the "primary" noise trajectory is sampled, false otherwise.
+ */
+ bool primary;
};
/**
@@ -70,8 +102,7 @@ class QuantumTrajectorySimulator {
* computing expectation values, etc). This function should have three
* required parameters [repetition ID (uint64_t), final state vector
* (const State&), statistics of sampled Kraus operator indices and/or
- * measured bitstrings (const std::vector&)] and any number of
- * optional parameters.
+ * measured bitstrings (const Stat&)] and any number of optional parameters.
* @param args Optional arguments for the 'measure' function.
* @return True if the simulation completed successfully; false otherwise.
*/
@@ -100,8 +131,7 @@ class QuantumTrajectorySimulator {
* computing expectation values, etc). This function should have three
* required parameters [repetition ID (uint64_t), final state vector
* (const State&), statistics of sampled Kraus operator indices and/or
- * measured bitstrings (const std::vector&)] and any number of
- * optional parameters.
+ * measured bitstrings (const Stat&)] and any number of optional parameters.
* @param args Optional arguments for the 'measure' function.
* @return True if the simulation completed successfully; false otherwise.
*/
@@ -116,20 +146,27 @@ class QuantumTrajectorySimulator {
gates.reserve(4 * std::size_t(cend - cbeg));
State state = state_space.Null();
- State scratch = state_space.Null();
- std::vector stat;
+ Stat stat;
+ bool had_primary_realization = false;
for (uint64_t r = r0; r < r1; ++r) {
if (!state_space.IsNull(state)) {
state_space.SetStateZero(state);
}
- if (!RunIteration(param, num_qubits, cbeg, cend, r,
- state_space, simulator, gates, scratch, state, stat)) {
+ bool apply_last_deferred_ops =
+ param.apply_last_deferred_ops || !had_primary_realization;
+
+ if (!RunIteration(param, apply_last_deferred_ops, num_qubits, cbeg, cend,
+ r, state_space, simulator, gates, state, stat)) {
return false;
}
+ if (stat.primary && !had_primary_realization) {
+ had_primary_realization = true;
+ }
+
measure(r, state, stat, args...);
}
@@ -144,7 +181,6 @@ class QuantumTrajectorySimulator {
* @param state_space StateSpace object required to manipulate state vector.
* @param simulator Simulator object. Provides specific implementations for
* applying gates.
- * @param scratch A temporary state vector. Used for samping Kraus operators.
* @param state The state of the system, to be updated by this method.
* @param stat Statistics of sampled Kraus operator indices and/or measured
* bitstrings, to be populated by this method.
@@ -153,11 +189,10 @@ class QuantumTrajectorySimulator {
static bool RunOnce(const Parameter& param,
const NoisyCircuit& circuit, uint64_t r,
const StateSpace& state_space, const Simulator& simulator,
- State& scratch, State& state,
- std::vector& stat) {
+ State& state, Stat& stat) {
return RunOnce(param, circuit.num_qubits, circuit.channels.begin(),
circuit.channels.end(), r, state_space, simulator,
- scratch, state, stat);
+ state, stat);
}
/**
@@ -170,7 +205,6 @@ class QuantumTrajectorySimulator {
* @param state_space StateSpace object required to manipulate state vector.
* @param simulator Simulator object. Provides specific implementations for
* applying gates.
- * @param scratch A temporary state vector. Used for samping Kraus operators.
* @param state The state of the system, to be updated by this method.
* @param stat Statistics of sampled Kraus operator indices and/or measured
* bitstrings, to be populated by this method.
@@ -180,13 +214,12 @@ class QuantumTrajectorySimulator {
ncircuit_iterator cbeg,
ncircuit_iterator cend,
uint64_t r, const StateSpace& state_space,
- const Simulator& simulator, State& scratch, State& state,
- std::vector& stat) {
+ const Simulator& simulator, State& state, Stat& stat) {
std::vector gates;
gates.reserve(4 * std::size_t(cend - cbeg));
- if (!RunIteration(param, num_qubits, cbeg, cend, r,
- state_space, simulator, gates, scratch, state, stat)) {
+ if (!RunIteration(param, param.apply_last_deferred_ops, num_qubits, cbeg,
+ cend, r, state_space, simulator, gates, state, stat)) {
return false;
}
@@ -194,16 +227,17 @@ class QuantumTrajectorySimulator {
}
private:
- static bool RunIteration(const Parameter& param, unsigned num_qubits,
+ static bool RunIteration(const Parameter& param,
+ bool apply_last_deferred_ops, unsigned num_qubits,
ncircuit_iterator cbeg,
ncircuit_iterator cend,
uint64_t rep, const StateSpace& state_space,
const Simulator& simulator,
- std::vector& gates, State& scratch,
- State& state, std::vector& stat) {
+ std::vector& gates,
+ State& state, Stat& stat) {
if (param.collect_kop_stat || param.collect_mea_stat) {
- stat.reserve(std::size_t(cend - cbeg));
- stat.resize(0);
+ stat.samples.reserve(std::size_t(cend - cbeg));
+ stat.samples.resize(0);
}
if (state_space.IsNull(state)) {
@@ -216,12 +250,12 @@ class QuantumTrajectorySimulator {
}
gates.resize(0);
- stat.resize(0);
RGen rgen(rep);
std::uniform_real_distribution distr(0.0, 1.0);
bool unitary = true;
+ stat.primary = true;
for (auto it = cbeg; it != cend; ++it) {
const auto& channel = *it;
@@ -247,6 +281,8 @@ class QuantumTrajectorySimulator {
CollectStat(param.collect_mea_stat, mresult.bits, stat);
+ stat.primary = false;
+
continue;
}
@@ -279,14 +315,8 @@ class QuantumTrajectorySimulator {
NormalizeState(!unitary, state_space, unitary, state);
- if (state_space.IsNull(scratch)) {
- scratch = CreateState(num_qubits, state_space);
- if (state_space.IsNull(scratch)) {
- return false;
- }
- }
-
- state_space.Copy(state, scratch);
+ double max_prob = 0;
+ std::size_t max_prob_index = 0;
// Perform sampling of Kraus operators using norms of updated states.
for (std::size_t i = 0; i < channel.size(); ++i) {
@@ -294,43 +324,39 @@ class QuantumTrajectorySimulator {
if (kop.unitary) continue;
- // Apply the Kraus operator.
- if (kop.ops.size() == 1) {
- ApplyGate(simulator, kop.ops[0], state);
- } else {
- DeferOps(kop.ops, gates);
+ double prob = std::real(
+ simulator.ExpectationValue(kop.qubits, kop.kd_k.data(), state));
- if (!ApplyDeferredOps(param, num_qubits, simulator, gates, state)) {
- return false;
- }
+ if (prob > max_prob) {
+ max_prob = prob;
+ max_prob_index = i;
}
- double n2 = state_space.Norm(state);
-
- cp += n2 - kop.prob;
+ cp += prob - kop.prob;
if (r < cp || i == channel.size() - 1) {
// Sample ith Kraus operator if r < cp
- // Sample the first Kraus operator if r is greater than the sum of
- // all probablities due to round-off errors.
- uint64_t k = r < cp ? i : 0;
+ // Sample the highest probability Kraus operator if r is greater
+ // than the sum of all probablities due to round-off errors.
+ uint64_t k = r < cp ? i : max_prob_index;
+ DeferOps(channel[k].ops, gates);
CollectStat(param.collect_kop_stat, k, stat);
unitary = false;
break;
}
-
- state_space.Copy(scratch, state);
}
}
- if (!ApplyDeferredOps(param, num_qubits, simulator, gates, state)) {
- return false;
- }
+ if (apply_last_deferred_ops || !stat.primary) {
+ if (!ApplyDeferredOps(param, num_qubits, simulator, gates, state)) {
+ return false;
+ }
- NormalizeState(!unitary, state_space, unitary, state);
+ NormalizeState(!unitary, state_space, unitary, state);
+ }
return true;
}
@@ -384,10 +410,13 @@ class QuantumTrajectorySimulator {
}
}
- static void CollectStat(bool collect_stat, uint64_t i,
- std::vector& stat) {
+ static void CollectStat(bool collect_stat, uint64_t i, Stat& stat) {
if (collect_stat) {
- stat.push_back(i);
+ stat.samples.push_back(i);
+ }
+
+ if (i != 0) {
+ stat.primary = false;
}
}
diff --git a/lib/run_qsim.h b/lib/run_qsim.h
index bbc401b6..b0aad9f3 100644
--- a/lib/run_qsim.h
+++ b/lib/run_qsim.h
@@ -79,7 +79,7 @@ struct QSimRunner final {
double t0 = 0.0;
double t1 = 0.0;
- if (param.verbosity > 0) {
+ if (param.verbosity > 1) {
t0 = GetTime();
}
@@ -96,17 +96,33 @@ struct QSimRunner final {
state_space.SetStateZero(state);
Simulator simulator = factory.CreateSimulator();
+ if (param.verbosity > 1) {
+ t1 = GetTime();
+ IO::messagef("init time is %g seconds.\n", t1 - t0);
+ t0 = GetTime();
+ }
+
auto fused_gates = Fuser::FuseGates(param, circuit.num_qubits,
circuit.gates, times_to_measure_at);
+
if (fused_gates.size() == 0 && circuit.gates.size() > 0) {
return false;
}
+ if (param.verbosity > 1) {
+ t1 = GetTime();
+ IO::messagef("fuse time is %g seconds.\n", t1 - t0);
+ }
+
+ if (param.verbosity > 0) {
+ t0 = GetTime();
+ }
+
unsigned cur_time_index = 0;
// Apply fused gates.
for (std::size_t i = 0; i < fused_gates.size(); ++i) {
- if (param.verbosity > 1) {
+ if (param.verbosity > 3) {
t1 = GetTime();
}
@@ -116,7 +132,7 @@ struct QSimRunner final {
return false;
}
- if (param.verbosity > 1) {
+ if (param.verbosity > 3) {
double t2 = GetTime();
IO::messagef("gate %lu done in %g seconds.\n", i, t2 - t1);
}
@@ -132,7 +148,7 @@ struct QSimRunner final {
if (param.verbosity > 0) {
double t2 = GetTime();
- IO::messagef("time elapsed %g seconds.\n", t2 - t0);
+ IO::messagef("time is %g seconds.\n", t2 - t0);
}
return true;
@@ -159,7 +175,7 @@ struct QSimRunner final {
double t0 = 0.0;
double t1 = 0.0;
- if (param.verbosity > 0) {
+ if (param.verbosity > 1) {
t0 = GetTime();
}
@@ -168,16 +184,33 @@ struct QSimRunner final {
StateSpace state_space = factory.CreateStateSpace();
Simulator simulator = factory.CreateSimulator();
+ if (param.verbosity > 1) {
+ t1 = GetTime();
+ IO::messagef("init time is %g seconds.\n", t1 - t0);
+ t0 = GetTime();
+ }
+
auto fused_gates = Fuser::FuseGates(param, circuit.num_qubits,
circuit.gates);
+
if (fused_gates.size() == 0 && circuit.gates.size() > 0) {
return false;
}
+
measure_results.reserve(fused_gates.size());
+ if (param.verbosity > 1) {
+ t1 = GetTime();
+ IO::messagef("fuse time is %g seconds.\n", t1 - t0);
+ }
+
+ if (param.verbosity > 0) {
+ t0 = GetTime();
+ }
+
// Apply fused gates.
for (std::size_t i = 0; i < fused_gates.size(); ++i) {
- if (param.verbosity > 1) {
+ if (param.verbosity > 3) {
t1 = GetTime();
}
@@ -187,7 +220,7 @@ struct QSimRunner final {
return false;
}
- if (param.verbosity > 1) {
+ if (param.verbosity > 3) {
double t2 = GetTime();
IO::messagef("gate %lu done in %g seconds.\n", i, t2 - t1);
}
@@ -195,7 +228,7 @@ struct QSimRunner final {
if (param.verbosity > 0) {
double t2 = GetTime();
- IO::messagef("time elapsed %g seconds.\n", t2 - t0);
+ IO::messagef("simu time is %g seconds.\n", t2 - t0);
}
return true;
diff --git a/lib/simulator.h b/lib/simulator.h
new file mode 100644
index 00000000..d5af3c2a
--- /dev/null
+++ b/lib/simulator.h
@@ -0,0 +1,511 @@
+// Copyright 2019 Google LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SIMULATOR_H_
+#define SIMULATOR_H_
+
+#include
+
+#include "bits.h"
+
+namespace qsim {
+
+/**
+ * Base class for simulator classes.
+ */
+class SimulatorBase {
+ protected:
+ // The follwoing template parameters are used for functions below.
+ // H - the number of high (target) qubits.
+ // L - the number of low (target) qubits.
+ // R - SIMD register width in floats.
+
+ // Fills the table of masks (ms) that is used to calculate base state indices
+ // and the table of offset indices (xss) that is used to access the state
+ // vector entries in matrix-vector multiplication functions. This function is
+ // used in simulator_basic.h, simulator_sse.h and simulator_avx.h (no bmi2
+ // version).
+ template
+ static void FillIndices(unsigned num_qubits, const std::vector& qs,
+ uint64_t* ms, uint64_t* xss) {
+ constexpr unsigned hsize = 1 << H;
+
+ uint64_t xs[H];
+
+ xs[0] = uint64_t{1} << (qs[L] + 1);
+ ms[0] = (uint64_t{1} << qs[L]) - 1;
+ for (unsigned i = 1; i < H; ++i) {
+ xs[i] = uint64_t{1} << (qs[L + i] + 1);
+ ms[i] = ((uint64_t{1} << qs[L + i]) - 1) ^ (xs[i - 1] - 1);
+ }
+ ms[H] = ((uint64_t{1} << num_qubits) - 1) ^ (xs[H - 1] - 1);
+
+ for (unsigned i = 0; i < hsize; ++i) {
+ uint64_t a = 0;
+ for (uint64_t k = 0; k < H; ++k) {
+ a += xs[k] * ((i >> k) & 1);
+ }
+ xss[i] = a;
+ }
+ }
+
+ // Fills gate matrix entries for gates with low qubits.
+ template
+ static void FillMatrix(unsigned qmaskl, const fp_type* matrix, fp_type* w) {
+ constexpr unsigned gsize = 1 << (H + L);
+ constexpr unsigned hsize = 1 << H;
+ constexpr unsigned lsize = 1 << L;
+ constexpr unsigned rsize = 1 << R;
+
+ unsigned s = 0;
+
+ for (unsigned i = 0; i < hsize; ++i) {
+ for (unsigned j = 0; j < gsize; ++j) {
+ unsigned p0 = 2 * i * lsize * gsize + 2 * lsize * (j / lsize);
+
+ for (unsigned k = 0; k < rsize; ++k) {
+ unsigned l = bits::CompressBits(k, R, qmaskl);
+ unsigned p = p0 + 2 * (gsize * l + (j + l) % lsize);
+
+ w[s + 0] = matrix[p];
+ w[s + rsize] = matrix[p + 1];
+
+ ++s;
+ }
+
+ s += rsize;
+ }
+ }
+ }
+
+ // Fills gate matrix entries for controlled gates with high target qubits
+ // and low control qubits.
+ template
+ static void FillControlledMatrixH(uint64_t cvalsl, uint64_t cmaskl,
+ const fp_type* matrix, fp_type* w) {
+ constexpr unsigned hsize = 1 << H;
+ constexpr unsigned rsize = 1 << R;
+
+ unsigned s = 0;
+
+ for (unsigned i = 0; i < hsize; ++i) {
+ for (unsigned j = 0; j < hsize; ++j) {
+ unsigned p = hsize * i + j;
+ fp_type v = i == j ? 1 : 0;
+
+ for (unsigned k = 0; k < rsize; ++k) {
+ w[s] = cvalsl == (k & cmaskl) ? matrix[2 * p] : v;
+ w[s + rsize] = cvalsl == (k & cmaskl) ? matrix[2 * p + 1] : 0;
+
+ ++s;
+ }
+
+ s += rsize;
+ }
+ }
+ }
+
+ // Fills gate matrix entries for controlled gates with low target qubits
+ // and low control qubits.
+ template
+ static void FillControlledMatrixL(uint64_t cvalsl, uint64_t cmaskl,
+ unsigned qmaskl, const fp_type* matrix,
+ fp_type* w) {
+ constexpr unsigned gsize = 1 << (H + L);
+ constexpr unsigned hsize = 1 << H;
+ constexpr unsigned lsize = 1 << L;
+ constexpr unsigned rsize = 1 << R;
+
+ unsigned s = 0;
+
+ for (unsigned i = 0; i < hsize; ++i) {
+ for (unsigned j = 0; j < gsize; ++j) {
+ unsigned p0 = i * lsize * gsize + lsize * (j / lsize);
+
+ for (unsigned k = 0; k < rsize; ++k) {
+ unsigned l = bits::CompressBits(k, R, qmaskl);
+ unsigned p = p0 + gsize * l + (j + l) % lsize;
+
+ fp_type v = p / gsize == p % gsize ? 1 : 0;
+
+ w[s] = cvalsl == (k & cmaskl) ? matrix[2 * p] : v;
+ w[s + rsize] = cvalsl == (k & cmaskl) ? matrix[2 * p + 1] : 0;
+
+ ++s;
+ }
+
+ s += rsize;
+ }
+ }
+ }
+
+/*
+ The GetMasks* functions below provide various masks and related values.
+ GetMasks1, GetMasks2, GetMasks3, GetMasks4, GetMasks5 and GetMasks6 are
+ used in simulator_avx.h (BMI2 version) and in simulator_avx512.h. GetMasks7,
+ GetMasks8, GetMasks9, GetMasks10 and GetMasks11 are used in simulator_avx.h
+ (no BMI2 version) and in simulator_sse.h.
+
+ imaskh - inverted mask of high qubits (high control and target qubits).
+ qmaskh - mask of high qubits (high target qubits).
+ cvalsh - control bit values of high control qubits placed in correct
+ positions.
+ cvalsl - control bit values of low control qubits placed in correct positions.
+ cmaskh - mask of high control qubits.
+ cmaskl - mask of low control qubits.
+ qmaskl - mask of low qubits (low target qubits).
+ cl - the number of low control qubits.
+
+ Note that imaskh, qmaskh and cvalsh are multiplied by two in GetMasks1,
+ GetMasks2, GetMasks3, GetMasks4, GetMasks5 and GetMasks6.
+*/
+
+ struct Masks1 {
+ uint64_t imaskh;
+ uint64_t qmaskh;
+ };
+
+ template
+ static Masks1 GetMasks1(const std::vector& qs) {
+ uint64_t qmaskh = 0;
+
+ for (unsigned i = 0; i < H; ++i) {
+ qmaskh |= uint64_t{1} << qs[i];
+ }
+
+ return {2 * (~qmaskh ^ ((1 << R) - 1)), 2 * qmaskh};
+ }
+
+ struct Masks2 {
+ uint64_t imaskh;
+ uint64_t qmaskh;
+ unsigned qmaskl;
+ };
+
+ template
+ static Masks2 GetMasks2(const std::vector& qs) {
+ uint64_t qmaskh = 0;
+ unsigned qmaskl = 0;
+
+ for (unsigned i = 0; i < L; ++i) {
+ qmaskl |= 1 << qs[i];
+ }
+
+ for (unsigned i = L; i < H + L; ++i) {
+ qmaskh |= uint64_t{1} << qs[i];
+ }
+
+ return {2 * (~qmaskh ^ ((1 << R) - 1)), 2 * qmaskh, qmaskl};
+ }
+
+ struct Masks3 {
+ uint64_t imaskh;
+ uint64_t qmaskh;
+ uint64_t cvalsh;
+ };
+
+ template
+ static Masks3 GetMasks3(unsigned num_qubits, const std::vector& qs,
+ const std::vector& cqs, uint64_t cvals) {
+ uint64_t qmaskh = 0;
+ uint64_t cmaskh = 0;
+
+ for (unsigned i = 0; i < H; ++i) {
+ qmaskh |= uint64_t{1} << qs[i];
+ }
+
+ for (auto q : cqs) {
+ cmaskh |= uint64_t{1} << q;
+ }
+
+ uint64_t cvalsh = bits::ExpandBits(cvals, num_qubits, cmaskh);
+
+ uint64_t maskh = ~(qmaskh | cmaskh) ^ ((1 << R) - 1);
+
+ return {2 * maskh, 2 * qmaskh, 2 * cvalsh};
+ }
+
+ struct Masks4 {
+ uint64_t imaskh;
+ uint64_t qmaskh;
+ uint64_t cvalsh;
+ uint64_t cvalsl;
+ uint64_t cmaskl;
+ unsigned cl;
+ };
+
+ template
+ static Masks4 GetMasks4(unsigned num_qubits, const std::vector& qs,
+ const std::vector& cqs, uint64_t cvals) {
+ unsigned cl = 0;
+ uint64_t qmaskh = 0;
+ uint64_t cmaskh = 0;
+ uint64_t cmaskl = 0;
+
+ for (unsigned i = 0; i < H; ++i) {
+ qmaskh |= uint64_t{1} << qs[i];
+ }
+
+ for (auto q : cqs) {
+ if (q >= R) {
+ cmaskh |= uint64_t{1} << q;
+ } else {
+ ++cl;
+ cmaskl |= uint64_t{1} << q;
+ }
+ }
+
+ uint64_t cvalsh = bits::ExpandBits(cvals >> cl, num_qubits, cmaskh);
+ uint64_t cvalsl = bits::ExpandBits(cvals & ((1 << cl) - 1), R, cmaskl);
+
+ uint64_t maskh = ~(qmaskh | cmaskh) ^ ((1 << R) - 1);
+
+ return {2 * maskh, 2 * qmaskh, 2 * cvalsh, cvalsl, cmaskl, cl};
+ }
+
+ struct Masks5 {
+ uint64_t imaskh;
+ uint64_t qmaskh;
+ uint64_t cvalsh;
+ unsigned qmaskl;
+ };
+
+ template
+ static Masks5 GetMasks5(unsigned num_qubits, const std::vector& qs,
+ const std::vector& cqs, uint64_t cvals) {
+ uint64_t qmaskh = 0;
+ uint64_t cmaskh = 0;
+ unsigned qmaskl = 0;
+
+ for (unsigned i = 0; i < L; ++i) {
+ qmaskl |= 1 << qs[i];
+ }
+
+ for (unsigned i = L; i < H + L; ++i) {
+ qmaskh |= uint64_t{1} << qs[i];
+ }
+
+ for (auto q : cqs) {
+ cmaskh |= uint64_t{1} << q;
+ }
+
+ uint64_t cvalsh = bits::ExpandBits(cvals, num_qubits, cmaskh);
+
+ uint64_t maskh = ~(qmaskh | cmaskh) ^ ((1 << R) - 1);
+
+ return {2 * maskh, 2 * qmaskh, 2 * cvalsh, qmaskl};
+ }
+
+ struct Masks6 {
+ uint64_t imaskh;
+ uint64_t qmaskh;
+ uint64_t cvalsh;
+ uint64_t cvalsl;
+ uint64_t cmaskl;
+ unsigned qmaskl;
+ unsigned cl;
+ };
+
+ template
+ static Masks6 GetMasks6(unsigned num_qubits, const std::vector& qs,
+ const std::vector& cqs, uint64_t cvals) {
+ unsigned cl = 0;
+ uint64_t qmaskh = 0;
+ uint64_t cmaskh = 0;
+ uint64_t cmaskl = 0;
+ unsigned qmaskl = 0;
+
+ for (unsigned i = 0; i < L; ++i) {
+ qmaskl |= 1 << qs[i];
+ }
+
+ for (unsigned i = L; i < H + L; ++i) {
+ qmaskh |= uint64_t{1} << qs[i];
+ }
+
+ for (auto q : cqs) {
+ if (q >= R) {
+ cmaskh |= uint64_t{1} << q;
+ } else {
+ ++cl;
+ cmaskl |= uint64_t{1} << q;
+ }
+ }
+
+ uint64_t cvalsh = bits::ExpandBits(cvals >> cl, num_qubits, cmaskh);
+ uint64_t cvalsl = bits::ExpandBits(cvals & ((1 << cl) - 1), R, cmaskl);
+
+ uint64_t maskh = ~(qmaskh | cmaskh) ^ ((1 << R) - 1);
+
+ return {2 * maskh, 2 * qmaskh, 2 * cvalsh, cvalsl, cmaskl, qmaskl, cl};
+ }
+
+ struct Masks7 {
+ uint64_t cvalsh;
+ uint64_t cmaskh;
+ };
+
+ static Masks7 GetMasks7(unsigned num_qubits, const std::vector& qs,
+ const std::vector& cqs, uint64_t cvals) {
+ uint64_t cmaskh = 0;
+
+ for (auto q : cqs) {
+ cmaskh |= uint64_t{1} << q;
+ }
+
+ uint64_t cvalsh = bits::ExpandBits(cvals, num_qubits, cmaskh);
+
+ return {cvalsh, cmaskh};
+ }
+
+ struct Masks8 {
+ uint64_t cvalsh;
+ uint64_t cmaskh;
+ uint64_t cvalsl;
+ uint64_t cmaskl;
+ };
+
+ template
+ static Masks8 GetMasks8(unsigned num_qubits, const std::vector& qs,
+ const std::vector& cqs, uint64_t cvals) {
+ unsigned cl = 0;
+ uint64_t cmaskh = 0;
+ uint64_t cmaskl = 0;
+
+ for (auto q : cqs) {
+ if (q >= R) {
+ cmaskh |= uint64_t{1} << q;
+ } else {
+ ++cl;
+ cmaskl |= uint64_t{1} << q;
+ }
+ }
+
+ uint64_t cvalsh = bits::ExpandBits(cvals >> cl, num_qubits, cmaskh);
+ uint64_t cvalsl = bits::ExpandBits(cvals & ((1 << cl) - 1), R, cmaskl);
+
+ return {cvalsh, cmaskh, cvalsl, cmaskl};
+ }
+
+ struct Masks9 {
+ uint64_t cvalsh;
+ uint64_t cmaskh;
+ unsigned qmaskl;
+ };
+
+ template
+ static Masks9 GetMasks9(unsigned num_qubits, const std::vector& qs,
+ const std::vector& cqs, uint64_t cvals) {
+ uint64_t cmaskh = 0;
+ unsigned qmaskl = 0;
+
+ for (unsigned i = 0; i < L; ++i) {
+ qmaskl |= 1 << qs[i];
+ }
+
+ for (auto q : cqs) {
+ cmaskh |= uint64_t{1} << q;
+ }
+
+ uint64_t cvalsh = bits::ExpandBits(cvals, num_qubits, cmaskh);
+
+ return {cvalsh, cmaskh, qmaskl};
+ }
+
+ struct Masks10 {
+ uint64_t cvalsh;
+ uint64_t cmaskh;
+ uint64_t cvalsl;
+ uint64_t cmaskl;
+ unsigned qmaskl;
+ };
+
+ template
+ static Masks10 GetMasks10(unsigned num_qubits,
+ const std::vector& qs,
+ const std::vector