From 44a18964a62652d853a494ded0b992e33d349ccd Mon Sep 17 00:00:00 2001 From: Anthony Micciche <32377705+amicciche@users.noreply.github.com> Date: Thu, 31 Aug 2023 12:04:46 -0400 Subject: [PATCH 1/5] shor syndrome, classic xor, and pauli frame x measurement (#170) --------- Co-authored-by: Stefan Krastanov --- CHANGELOG.md | 6 + Project.toml | 2 +- .../QuantumCliffordQuantikzExt.jl | 1 + src/QuantumClifford.jl | 2 +- src/ecc/ECC.jl | 115 +++++++++++++++++- src/misc_ops.jl | 9 ++ src/pauli_frames.jl | 25 +++- test/runtests.jl | 1 + test/test_ecc.jl | 4 +- test/test_ecc_syndromes.jl | 54 ++++++++ test/test_jet.jl | 2 +- 11 files changed, 208 insertions(+), 13 deletions(-) create mode 100644 test/test_ecc_syndromes.jl diff --git a/CHANGELOG.md b/CHANGELOG.md index 47b8e2458..f69abf7b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,15 @@ - `permute` will be a wrapper around to `QuantumInterface.permutesubsystems`. Documentation for `permute!` would be similarly updated - reworking the rest of `NoisyCircuits` and moving it out of `Experimental` +- `random_pauli(...; realphase=true)` should be the default # News +## v0.8.16 - 2023-08-31 + +- **(breaking)** Changes to the circuit generation in the non-public ECC module. Adding a Shor syndrome measurement circuit. +- Added a `ClassicalXOR` gate, for now supporting only `PauliFrame`s. + ## v0.8.15 - 2023-08-16 - Initial support for GPU accelerated circuit simulation (with CUDA). diff --git a/Project.toml b/Project.toml index 68d25a160..47542bfa0 100644 --- a/Project.toml +++ b/Project.toml @@ -47,7 +47,7 @@ Makie = "0.19.7" Nemo = "0.34, 0.35" Plots = "1.38.0" PrecompileTools = "1" -Quantikz = "1.3" +Quantikz = "1.3.1" QuantumInterface = "0.3.0" QuantumOpticsBase = "0.4" SIMD = "3.4.0" diff --git a/ext/QuantumCliffordQuantikzExt/QuantumCliffordQuantikzExt.jl b/ext/QuantumCliffordQuantikzExt/QuantumCliffordQuantikzExt.jl index b94627cf0..04aae7e66 100644 --- a/ext/QuantumCliffordQuantikzExt/QuantumCliffordQuantikzExt.jl +++ b/ext/QuantumCliffordQuantikzExt/QuantumCliffordQuantikzExt.jl @@ -72,6 +72,7 @@ function Quantikz.QuantikzOp(op::Reset) # TODO This is complicated because quant end Quantikz.QuantikzOp(op::NoiseOp) = Quantikz.Noise(op.indices) Quantikz.QuantikzOp(op::NoiseOpAll) = Quantikz.NoiseAll() +Quantikz.QuantikzOp(op::ClassicalXOR) = Quantikz.ClassicalDecision(sort([op.store, op.bits...])) function lstring(pauli::PauliOperator) v = join(("\\mathtt{$(o)}" for o in replace(string(pauli)[3:end],"_"=>"I")),"\\\\") diff --git a/src/QuantumClifford.jl b/src/QuantumClifford.jl index 6cbfb56c9..6c77b1106 100644 --- a/src/QuantumClifford.jl +++ b/src/QuantumClifford.jl @@ -53,7 +53,7 @@ export # Misc Ops SparseGate, sMX, sMY, sMZ, PauliMeasurement, Reset, sMRX, sMRY, sMRZ, - BellMeasurement, + BellMeasurement, ClassicalXOR, VerifyOp, Register, # Enumeration and Randoms diff --git a/src/ecc/ECC.jl b/src/ecc/ECC.jl index 5eefd1b46..0410b5a60 100644 --- a/src/ecc/ECC.jl +++ b/src/ecc/ECC.jl @@ -44,6 +44,10 @@ end """Generate the non-fault-tolerant stabilizer measurement cicuit for a given code instance or parity check tableau. Use the `ancillary_index` and `bit_index` arguments to offset where the corresponding part the circuit starts. + +Returns the circuit, the number of ancillary qubits that were added, and a list of bit indices that will store the measurement results. + +See also: [`shor_syndrome_circuit`](@ref) """ function naive_syndrome_circuit end @@ -51,8 +55,15 @@ function naive_syndrome_circuit(code_type::AbstractECC, ancillary_index=1, bit_i naive_syndrome_circuit(parity_checks(code_type), ancillary_index, bit_index) end -"""Circuit that measures the corresponding PauliOperator by using conditional gates into an ancillary -qubit at index `nqubits(p)+ancillary_index` and stores the measurement result into classical bit `bit_index`.""" +"""Naive Pauli measurement circuit using a single ancillary qubit. + +Not a fault-tolerant circuit, but useful for testing purposes. + +Measures the corresponding `PauliOperator` by using conditional gates into an ancillary +qubit at index `nqubits(p)+ancillary_index` and stores the measurement result +into classical bit `bit_index`. + +See also: [`naive_syndrome_circuit`](@ref), [`shor_ancillary_paulimeasurement`](@ref)""" function naive_ancillary_paulimeasurement(p::PauliOperator, ancillary_index=1, bit_index=1) circuit = AbstractOperation[] numQubits = nqubits(p) @@ -73,14 +84,106 @@ end function naive_syndrome_circuit(parity_check_tableau, ancillary_index=1, bit_index=1) naive_sc = AbstractOperation[] + ancillaries = 0 + bits = 0 + for check in parity_check_tableau + append!(naive_sc,naive_ancillary_paulimeasurement(check, ancillary_index+ancillaries, bit_index+bits)) + ancillaries +=1 + bits +=1 + end + + return naive_sc, ancillaries, bit_index:bit_index+bits-1 +end + +"""Generate the Shor fault-tolerant stabilizer measurement cicuit for a given code instance or parity check tableau. + +Use the `ancillary_index` and `bit_index` arguments to offset where the corresponding part the circuit starts. +Ancillary qubits + +Returns: + - The cat state preparation circuit. + - The Shor syndrome measurement circuit. + - The number of ancillary qubits that were added. + - The list of bit indices that store the final measurement results. + +See also: [`naive_syndrome_circuit`](@ref) +""" +function shor_syndrome_circuit end + +function shor_syndrome_circuit(code_type::AbstractECC, ancillary_index=1, bit_index=1) + shor_syndrome_circuit(parity_checks(code_type), ancillary_index, bit_index) +end + +"""Shor's Pauli measurement circuit using a multiple entangled ancillary qubits. + +A fault-tolerant circuit. + +Measures the corresponding PauliOperator by using conditional gates into multiple ancillary +entangled qubits starting at index `nqubits(p)+ancillary_index` +and stores the measurement result into classical bits starting at `bit_index`. +The final measurement result is the XOR of all the bits. + +Returns: + - The cat state preparation circuit. + - The Shor syndrome measurement circuit. + - One more than the index of the last added ancillary qubit. + - One more than the index of the last added classical bit. + +See also: [`naive_syndrome_circuit`](@ref), [`naive_ancillary_paulimeasurement`](@ref)""" +function shor_ancillary_paulimeasurement(p::PauliOperator, ancillary_index=1, bit_index=1) + init_ancil_index = ancillary_index + circuit = AbstractOperation[] + measurements = AbstractOperation[] + numQubits = nqubits(p) + for qubit in 1:numQubits + if p[qubit] == (1,0) + push!(circuit, sXCZ(qubit, numQubits + ancillary_index)) + elseif p[qubit] == (0,1) + push!(circuit, sZCZ(qubit, numQubits + ancillary_index)) + elseif p[qubit] == (1,1) + push!(circuit, sYCZ(qubit, numQubits + ancillary_index)) + end + if p[qubit] != (0,0) + push!(measurements, sMRX(numQubits + ancillary_index, bit_index)) + ancillary_index +=1 + bit_index +=1 + end + end + circuit = vcat(circuit,measurements) + + cat_state_circuit = AbstractOperation[] + push!(cat_state_circuit, sHadamard(numQubits + init_ancil_index)) + for i in (init_ancil_index+1):(ancillary_index -1) + push!(cat_state_circuit, sCNOT(numQubits + (i - 1), numQubits + i)) + end + + return cat_state_circuit, circuit, ancillary_index, bit_index +end + +function shor_syndrome_circuit(parity_check_tableau, ancillary_index=1, bit_index=1) + shor_sc = AbstractOperation[] + xor_indices = [] + cat_circuit = AbstractOperation[] + initial_ancillary_index = ancillary_index for check in parity_check_tableau - naive_sc = append!(naive_sc,naive_ancillary_paulimeasurement(check, ancillary_index, bit_index)) - ancillary_index +=1 - bit_index +=1 + cat_circ, circ, new_ancillary_index, new_bit_index = shor_ancillary_paulimeasurement(check, ancillary_index, bit_index) + push!(xor_indices, collect(bit_index:new_bit_index-1)) + + append!(shor_sc, circ) + append!(cat_circuit, cat_circ) + + ancillary_index = new_ancillary_index + bit_index = new_bit_index + end + + final_bits = 0 + for indices in xor_indices + push!(shor_sc, QuantumClifford.ClassicalXOR(indices, bit_index+final_bits)) + final_bits += 1 end - return naive_sc + return cat_circuit, shor_sc, ancillary_index-initial_ancillary_index, bit_index:bit_index+final_bits-1 end """The distance of a code.""" diff --git a/src/misc_ops.jl b/src/misc_ops.jl index 5361d21e7..af24b0395 100644 --- a/src/misc_ops.jl +++ b/src/misc_ops.jl @@ -126,3 +126,12 @@ function applywstatus!(s::AbstractQCState, v::VerifyOp) # XXX It assumes the oth end operatordeterminism(::Type{VerifyOp}) = DeterministicOperatorTrait() + +"""Applies an XOR gate to classical bits. Currently only implemented for funcitonality with pauli frames.""" +struct ClassicalXOR{N} <: AbstractOperation + "The indices of the classical bits to be xor-ed" + bits::NTuple{N,Int} + "The index of the classical bit that will store the results" + store::Int +end +ClassicalXOR(bits::Vector, store::Int) = ClassicalXOR(tuple(bits...),store) diff --git a/src/pauli_frames.jl b/src/pauli_frames.jl index ee0a1d1fb..71d5dc10b 100644 --- a/src/pauli_frames.jl +++ b/src/pauli_frames.jl @@ -57,7 +57,28 @@ function apply!(f::PauliFrame, op::AbstractCliffordOperator) return f end -function apply!(frame::PauliFrame, op::sMZ) # TODO sMX, sMY +function apply!(frame::PauliFrame, xor::ClassicalXOR) + for f in eachindex(frame) + value = frame.measurements[f,xor.bits[1]] + for i in xor.bits[2:end] + value ⊻= frame.measurements[f,i] + end + frame.measurements[f, xor.store] = value + end +end + +function apply!(frame::PauliFrame, op::sMX) # TODO implement a faster direct version + op.bit == 0 && return frame + apply!(frame, sHadamard(op.qubit)) + apply!(frame, sMZ(op.qubit, op.bit)) +end + +function apply!(frame::PauliFrame, op::sMRX) # TODO implement a faster direct version + apply!(frame, sHadamard(op.qubit)) + apply!(frame, sMRZ(op.qubit, op.bit)) +end + +function apply!(frame::PauliFrame, op::sMZ) # TODO sMY, and faster sMX op.bit == 0 && return frame i = op.qubit xzs = frame.frame.tab.xzs @@ -75,7 +96,7 @@ function apply!(frame::PauliFrame, op::sMZ) # TODO sMX, sMY return frame end -function apply!(frame::PauliFrame, op::sMRZ) # TODO sMRX, sMRY +function apply!(frame::PauliFrame, op::sMRZ) # TODO sMRY, and faster sMRX i = op.qubit xzs = frame.frame.tab.xzs T = eltype(xzs) diff --git a/test/runtests.jl b/test/runtests.jl index 3b7b5b4b2..a7099f8b4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -60,6 +60,7 @@ end @doset "enumerate" @doset "quantumoptics" @doset "ecc" +@doset "ecc_syndromes" @doset "precompile" @doset "pauliframe" @doset "allocations" diff --git a/test/test_ecc.jl b/test/test_ecc.jl index eec808bca..5b7a16b9e 100644 --- a/test/test_ecc.jl +++ b/test/test_ecc.jl @@ -67,7 +67,7 @@ function test_naive_syndrome(c::AbstractECC) s1 = copy(logicalqubits) syndrome1 = [project!(s1, check)[3] for check in parity_checks(c)] # measure using `naive_syndrome_circuit` - naive_circuit = naive_syndrome_circuit(c) + naive_circuit, _ = naive_syndrome_circuit(c) ancillaryqubits = one(Stabilizer,code_s(c)) s2 = copy(logicalqubits) syndrome2 = Register(s2⊗ancillaryqubits, falses(code_s(c))) @@ -89,7 +89,7 @@ end function test_with_pframes(code) ecirc = encoding_circuit(code) - scirc = naive_syndrome_circuit(code) + scirc, _ = naive_syndrome_circuit(code) nframes = 10 dataqubits = code_n(code) ancqubits = code_s(code) diff --git a/test/test_ecc_syndromes.jl b/test/test_ecc_syndromes.jl new file mode 100644 index 000000000..851f95157 --- /dev/null +++ b/test/test_ecc_syndromes.jl @@ -0,0 +1,54 @@ +using Test +using QuantumClifford +using QuantumClifford: mul_left! +using QuantumClifford.ECC: AbstractECC, Steane7, Shor9, Bitflip3, naive_syndrome_circuit, encoding_circuit, shor_syndrome_circuit, code_n, code_s + +codes = [ + Bitflip3(), + Steane7(), + Shor9(), +] + +## + +function pframe_naive_vs_shor_syndrome(code) + ecirc = encoding_circuit(code) + naive_scirc, naive_ancillaries = naive_syndrome_circuit(code) + shor_cat_scirc, shor_scirc, shor_ancillaries, shor_bits = shor_syndrome_circuit(code) + nframes = 10 + dataqubits = code_n(code) + syndromebits = code_s(code) + naive_qubits = dataqubits + syndromebits + shor_qubits = dataqubits + shor_ancillaries + # no noise + naive_frames = PauliFrame(nframes, naive_qubits, syndromebits) + shor_frames = PauliFrame(nframes, shor_qubits, last(shor_bits)) + naive_circuit = [ecirc..., naive_scirc...] + shor_circuit = [ecirc..., shor_cat_scirc..., shor_scirc...] + pftrajectories(naive_frames, naive_circuit) + pftrajectories(shor_frames, shor_circuit) + @test pfmeasurements(naive_frames) == pfmeasurements(shor_frames)[:,shor_bits] + # with errors + for _ in 1:10 + naive_frames = PauliFrame(nframes, naive_qubits, syndromebits) + shor_frames = PauliFrame(nframes, shor_qubits, last(shor_bits)) + pftrajectories(naive_frames, ecirc) + pftrajectories(shor_frames, [ecirc..., shor_cat_scirc...]) + # manually injecting the same type of noise in the frames -- not really a user accessible API + p = random_pauli(dataqubits, realphase=true) + pn = embed(naive_qubits, 1:dataqubits, p) + ps = embed(shor_qubits, 1:dataqubits, p) + mul_left!(naive_frames.frame, pn) + mul_left!(shor_frames.frame, ps) + # run the syndrome circuits using the public API + pftrajectories(naive_frames, naive_scirc) + pftrajectories(shor_frames, shor_scirc) + @test pfmeasurements(naive_frames) == pfmeasurements(shor_frames)[:,shor_bits] + end +end + +@testset "naive and shor measurement circuits" begin + for c in codes + pframe_naive_vs_shor_syndrome(c) + end +end diff --git a/test/test_jet.jl b/test/test_jet.jl index 994ea8712..744f78470 100644 --- a/test/test_jet.jl +++ b/test/test_jet.jl @@ -36,5 +36,5 @@ end ) @show rep @test_broken length(JET.get_reports(rep)) == 0 - @test length(JET.get_reports(rep)) <= 7 + @test length(JET.get_reports(rep)) <= 9 end From 2cd865e0f96c6c71362beb4180646d568a231784 Mon Sep 17 00:00:00 2001 From: Shayan Pardis <46478622+Shayan-P@users.noreply.github.com> Date: Thu, 31 Aug 2023 12:29:28 -0400 Subject: [PATCH 2/5] Add noise gate GPU support (#171) --------- Co-authored-by: Stefan Krastanov --- .../QuantumCliffordGPUExt.jl | 3 +- ext/QuantumCliffordGPUExt/apply_noise.jl | 40 +++++++++++++++++++ test/test_gpu.jl | 28 ++++++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 ext/QuantumCliffordGPUExt/apply_noise.jl diff --git a/ext/QuantumCliffordGPUExt/QuantumCliffordGPUExt.jl b/ext/QuantumCliffordGPUExt/QuantumCliffordGPUExt.jl index c79571c8c..fd564ed2c 100644 --- a/ext/QuantumCliffordGPUExt/QuantumCliffordGPUExt.jl +++ b/ext/QuantumCliffordGPUExt/QuantumCliffordGPUExt.jl @@ -2,7 +2,7 @@ module QuantumCliffordGPUExt using CUDA using QuantumClifford -import QuantumClifford: to_cpu, to_gpu, _apply!, apply!, pftrajectories, fastcolumn, fastrow +import QuantumClifford: to_cpu, to_gpu, _apply!, apply!, pftrajectories, fastcolumn, fastrow, applynoise! include("utils.jl") include("types.jl") @@ -10,6 +10,7 @@ include("adapters.jl") include("apply.jl") include("pauli_frames.jl") include("fastmemlayout.jl") +include("apply_noise.jl") # todo hide _apply function later. only for internal use end diff --git a/ext/QuantumCliffordGPUExt/apply_noise.jl b/ext/QuantumCliffordGPUExt/apply_noise.jl new file mode 100644 index 000000000..57b4dfebf --- /dev/null +++ b/ext/QuantumCliffordGPUExt/apply_noise.jl @@ -0,0 +1,40 @@ +using QuantumClifford: _div, _mod + +function applynoise!(frame::PauliFrameGPU{T},noise::UnbiasedUncorrelatedNoise,i::Int) where {T <: Unsigned} + p = noise.errprobthird + lowbit = T(1) + ibig = _div(T,i-1)+1 + ismall = _mod(T,i-1) + ismallm = lowbit<<(ismall) + + stab = frame.frame + xzs = tab(stab).xzs + rows::Unsigned = size(stab, 1) + + @run_cuda applynoise_kernel(xzs, p, ibig, ismallm, rows) rows + return frame +end + + +function applynoise_kernel(xzs::DeviceMatrix{Tme}, + p::Real, + ibig::Int, + ismallm::Tme, + rows::Unsigned) where {Tme <: Unsigned} + + f = (blockIdx().x - 1) * blockDim().x + threadIdx().x; + if f > rows + return nothing + end + + r = rand() + if r < p # X error + xzs[ibig,f] ⊻= ismallm + elseif r < 2p # Z error + xzs[end÷2+ibig,f] ⊻= ismallm + elseif r < 3p # Y error + xzs[ibig,f] ⊻= ismallm + xzs[end÷2+ibig,f] ⊻= ismallm + end + return nothing +end diff --git a/test/test_gpu.jl b/test/test_gpu.jl index 6740c6404..a82f3bea3 100644 --- a/test/test_gpu.jl +++ b/test/test_gpu.jl @@ -34,8 +34,13 @@ function apply_single_or_double_qubit_and_compare(n, s, s_gpu) end end +function approx(a, b, threshold) + maximum(abs.(a - b)) <= threshold +end + + @testset "GPU" begin - CUDA.allowscalar(false) # todo how to add this to all tests. + CUDA.allowscalar(false) # make sure we are using GPU kernels and not iterating on indices @test begin p = random_pauli(3) @@ -118,4 +123,25 @@ end end true end + + @test begin + # test applynoise + N = 4 + trajectories = 10000 + f = PauliFrame(trajectories, N, N); + f = to_gpu(f) + p = 0.1 + measurements = pftrajectories(f, [ + sMZ(1, 1), + sHadamard(2), + sMZ(2, 2), + NoiseOp(UnbiasedUncorrelatedNoise(p), [3, 4]), + sMZ(3, 3), + sHadamard(4), + sMZ(4, 4) + ]).measurements + avg_result = to_cpu(sum(measurements, dims=1) / trajectories) + error_threshold = 0.02 + approx(vec(avg_result), [0, .5, 2p, .5], error_threshold) + end end From 5d7f2bf4e48caeee6698348d8b5f24877757d0f9 Mon Sep 17 00:00:00 2001 From: Stefan Krastanov Date: Thu, 31 Aug 2023 13:47:37 -0400 Subject: [PATCH 3/5] docfix due to a breakingchange in naive_syndrome_circuit (#183) --- docs/src/ecc_example_sim.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/ecc_example_sim.md b/docs/src/ecc_example_sim.md index 5feef8edf..e20f66703 100644 --- a/docs/src/ecc_example_sim.md +++ b/docs/src/ecc_example_sim.md @@ -26,7 +26,7 @@ ecirc = encoding_circuit(code) ... and the corresponding syndrome measurement circuit (the non-fault tolerant one) ```@example 1 -scirc = naive_syndrome_circuit(code) +scirc, _ = naive_syndrome_circuit(code) ``` The most straightforward way to start sampling syndromes is to set up a table of Pauli frames. From 4e767d2163fb21d63090988ff713917a9570dfbc Mon Sep 17 00:00:00 2001 From: Stefan Krastanov Date: Thu, 31 Aug 2023 14:46:31 -0400 Subject: [PATCH 4/5] bump version number --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 47542bfa0..ae5bc7dbe 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "QuantumClifford" uuid = "0525e862-1e90-11e9-3e4d-1b39d7109de1" authors = ["Stefan Krastanov "] -version = "0.8.15" +version = "0.8.16" [deps] Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" From 5c9822e18ddaa121aae3f818383a40ae553e9fba Mon Sep 17 00:00:00 2001 From: Shayan Pardis <46478622+Shayan-P@users.noreply.github.com> Date: Fri, 8 Sep 2023 13:22:13 -0400 Subject: [PATCH 5/5] Optimize inner gpu type (#172) --------- Co-authored-by: Stefan Krastanov --- benchmark/benchmark_gpu_innertype.jl | 81 ++++++++++++++++++++++++ ext/QuantumCliffordGPUExt/adapters.jl | 43 +++++++------ ext/QuantumCliffordGPUExt/apply_noise.jl | 1 + 3 files changed, 107 insertions(+), 18 deletions(-) create mode 100644 benchmark/benchmark_gpu_innertype.jl diff --git a/benchmark/benchmark_gpu_innertype.jl b/benchmark/benchmark_gpu_innertype.jl new file mode 100644 index 000000000..b1e639a2c --- /dev/null +++ b/benchmark/benchmark_gpu_innertype.jl @@ -0,0 +1,81 @@ +using Revise +using QuantumClifford +using QuantumClifford: to_cpu, to_gpu +using CUDA +using BenchmarkTools +using Plots +using ProgressBars +using DataFrames +using CSV + +include("surface-code-circuit.jl") + +ints = [UInt8, UInt16, UInt32, UInt64, UInt128] +names = [:UInt8, :UInt16, :UInt32, :UInt64, :UInt128] + +function get_stats() + println("Starting tests with $(Threads.nthreads()) threads out of `Sys.CPU_THREADS = $(Sys.CPU_THREADS)`...") + + # Powers of 2 to benchmark + powers = reverse(3:18) # start from the hardest one first! (fail fast) + n_values = 2 .^ powers + + times = Dict([ + sm=>[] + for (sm, tp) in zip(names, ints) + ]) + + ccircuit = if eltype(circ) <: QuantumClifford.CompactifiedGate + circ + else + compactify_circuit(circ) + end + + CUDA.allowscalar(false) + + + for n in ProgressBar(n_values) + for (sm, T) in zip(names, ints) + gpu_frames() = to_gpu(QuantumClifford._create_pauliframe(ccircuit; trajectories=n), T) + gpu_column() = pftrajectories(fastcolumn(gpu_frames()), ccircuit), synchronize() + @show n, T + @elapsed gpu_column() # to eliminate compile time + push!(times[sm], @belapsed $gpu_column()) + end + # note. this is also measuring the time it takes to move data to gpu + end + df = DataFrame(times) + df.n_values=n_values + return df +end + +function load_stats() + if isfile("gpu_innertype_comparison.csv") + DataFrame(CSV.File("gpu_innertype_comparison.csv")) + else + get_stats() + end +end + +function save_stats(df) + CSV.write("gpu_innertype_comparison.csv", df) +end + +function plot_result(df) + my_y_axis(arrs) = begin + arr = [] + for a in arrs + append!(arr, a) + end + logarr = log.(arr) + exp.(range(min(logarr...), stop=max(logarr...), length=8)) + end + plot(df.n_values, [df[!, col] for col in names], marker=:circle, label=reshape(["times $col" for col in names], (1, length(names))), xlabel="n", ylabel="Execution Time (s)", title="gpu inner type comparison (fast column)", xticks=df.n_values[1:2:end], xscale=:log2, yticks=my_y_axis([df[!, col] for col in names]), yscale=:log10, legend=:topleft, size=(1200, 800)) +end + +function main() + df = load_stats() + save_stats(df) + plot_result(df) + savefig("benchmark_gpu_innertype.png") +end diff --git a/ext/QuantumCliffordGPUExt/adapters.jl b/ext/QuantumCliffordGPUExt/adapters.jl index 5b009728a..86d91685a 100644 --- a/ext/QuantumCliffordGPUExt/adapters.jl +++ b/ext/QuantumCliffordGPUExt/adapters.jl @@ -1,33 +1,40 @@ -# const InnerGPUType = UInt64; +const InnerGPUType = UInt32; +const InnerCPUType = UInt64; # todo. make the conversion types consiting with this... # also define a cpu default type?! -to_gpu(array::AbstractArray) = CuArray(array); +to_gpu(array::AbstractArray) = CuArray(array) to_cpu(array::AbstractArray) = Array(array); +# changes the type to InnerGPUType or InnerCPUType as well as copying to cpu/gpu +to_gpu(array::AbstractArray, tp::Type{T}) where {T <: Unsigned} = + CuArray(reinterpret(T, collect(array))) +to_cpu(array::AbstractArray, tp::Type{T}) where {T <: Unsigned} = + Array(reinterpret(T, collect(array))) + # maybe change the format of storing the data in gpu array # so that it is more convinient to work with them on gpu? # todo later add some type checking to avoid copying (or throw error) if the data is already on gpu/cpu -to_gpu(tab::QuantumClifford.Tableau) = - QuantumClifford.Tableau(to_gpu(tab.phases), tab.nqubits, to_gpu(tab.xzs)) +to_gpu(tab::QuantumClifford.Tableau, tp::Type{T}=InnerGPUType) where {T <: Unsigned} = + QuantumClifford.Tableau(to_gpu(tab.phases), tab.nqubits, to_gpu(tab.xzs, tp)) -to_cpu(tab::QuantumClifford.Tableau) = - QuantumClifford.Tableau(to_cpu(tab.phases), tab.nqubits, to_cpu(tab.xzs)) +to_cpu(tab::QuantumClifford.Tableau, tp::Type{T}=InnerCPUType) where {T <: Unsigned} = + QuantumClifford.Tableau(to_cpu(tab.phases), tab.nqubits, to_cpu(tab.xzs, tp)) -to_gpu(pauli::QuantumClifford.PauliOperator) = - QuantumClifford.PauliOperator(to_gpu(pauli.phase), pauli.nqubits, to_gpu(pauli.xz)) +to_gpu(pauli::QuantumClifford.PauliOperator, tp::Type{T}=InnerGPUType) where {T <: Unsigned} = + QuantumClifford.PauliOperator(to_gpu(pauli.phase), pauli.nqubits, to_gpu(pauli.xz, tp)) -to_cpu(pauli::QuantumClifford.PauliOperator) = - QuantumClifford.PauliOperator(to_cpu(pauli.phase), pauli.nqubits, to_cpu(pauli.xz)) +to_cpu(pauli::QuantumClifford.PauliOperator, tp::Type{T}=InnerCPUType) where {T <: Unsigned} = + QuantumClifford.PauliOperator(to_cpu(pauli.phase), pauli.nqubits, to_cpu(pauli.xz, tp)) -to_gpu(stabilizer::QuantumClifford.Stabilizer) = - Stabilizer(to_gpu(tab(stabilizer))) +to_gpu(stabilizer::QuantumClifford.Stabilizer, tp::Type{T}=InnerGPUType) where {T <: Unsigned} = + Stabilizer(to_gpu(tab(stabilizer), tp)) -to_cpu(stabilizer::QuantumClifford.Stabilizer) = - Stabilizer(to_cpu(tab(stabilizer))) +to_cpu(stabilizer::QuantumClifford.Stabilizer, tp::Type{T}=InnerCPUType) where {T <: Unsigned} = + Stabilizer(to_cpu(tab(stabilizer), tp)) -to_gpu(pauli_frame::QuantumClifford.PauliFrame) = - QuantumClifford.PauliFrame(to_gpu(pauli_frame.frame), to_gpu(pauli_frame.measurements)) +to_gpu(pauli_frame::QuantumClifford.PauliFrame, tp::Type{T}=InnerGPUType) where {T <: Unsigned} = + QuantumClifford.PauliFrame(to_gpu(pauli_frame.frame, tp), to_gpu(pauli_frame.measurements)) -to_cpu(pauli_frame::QuantumClifford.PauliFrame) = - QuantumClifford.PauliFrame(to_cpu(pauli_frame.frame), to_cpu(pauli_frame.measurements)) +to_cpu(pauli_frame::QuantumClifford.PauliFrame, tp::Type{T}=InnerCPUType) where {T <: Unsigned} = + QuantumClifford.PauliFrame(to_cpu(pauli_frame.frame, tp), to_cpu(pauli_frame.measurements)) diff --git a/ext/QuantumCliffordGPUExt/apply_noise.jl b/ext/QuantumCliffordGPUExt/apply_noise.jl index 57b4dfebf..ecfcd647e 100644 --- a/ext/QuantumCliffordGPUExt/apply_noise.jl +++ b/ext/QuantumCliffordGPUExt/apply_noise.jl @@ -27,6 +27,7 @@ function applynoise_kernel(xzs::DeviceMatrix{Tme}, return nothing end + r = rand() if r < p # X error xzs[ibig,f] ⊻= ismallm