From c390beca2cc54a102e4ce5353ab6857b17107cb1 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Mon, 4 Mar 2024 19:06:37 +0100 Subject: [PATCH 01/27] Add truncated SVD adjoint with wrapper for KrylovKit iterative SVD, add small test script --- Project.toml | 1 + examples/test_svd_adjoint.jl | 99 ++++++++++++++++++++++++ src/PEPSKit.jl | 4 +- src/utility/svd.jl | 143 +++++++++++++++++++++++++++++++++++ 4 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 examples/test_svd_adjoint.jl create mode 100644 src/utility/svd.jl diff --git a/Project.toml b/Project.toml index 8028adf6..d5e3a930 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" TensorKit = "07d1fe3e-3e46-537d-9eac-e9e13d0d4cec" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] MPSKit = "0.10" diff --git a/examples/test_svd_adjoint.jl b/examples/test_svd_adjoint.jl new file mode 100644 index 00000000..e11f0eff --- /dev/null +++ b/examples/test_svd_adjoint.jl @@ -0,0 +1,99 @@ +using LinearAlgebra +using TensorKit +using ChainRulesCore, Zygote +using PEPSKit + +# Non-proper truncated SVD with outdated adjoint +oldsvd(t::AbstractTensorMap, χ::Int; kwargs...) = itersvd(t, χ; kwargs...) + +# Outdated adjoint not taking truncated part into account +function ChainRulesCore.rrule( + ::typeof(oldsvd), t::AbstractTensorMap, χ::Int; εbroad=0, kwargs... +) + U, S, V = oldsvd(t, χ; kwargs...) + + function oldsvd_pullback((ΔU, ΔS, ΔV)) + ∂t = similar(t) + for (c, b) in blocks(∂t) + copyto!( + b, + oldsvd_rev( + block(U, c), + block(S, c), + block(V, c), + block(ΔU, c), + block(ΔS, c), + block(ΔV, c); + εbroad, + ), + ) + end + return NoTangent(), ∂t, NoTangent() + end + + return (U, S, V), oldsvd_pullback +end + +function oldsvd_rev( + U::AbstractMatrix, + S::AbstractMatrix, + V::AbstractMatrix, + ΔU, + ΔS, + ΔV; + εbroad=0, + atol::Real=0, + rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4), +) + S = diagm(S) + V = copy(V') + + tol = atol > 0 ? atol : rtol * S[1, 1] + F = PEPSKit.invert_S²(S, tol; εbroad) # Includes Lorentzian broadening + S⁻¹ = pinv(S; atol=tol) + + # dS contribution + term = ΔS isa ZeroTangent ? ΔS : Diagonal(diag(ΔS)) + + # dU₁ and dV₁ off-diagonal contribution + J = F .* (U' * ΔU) + term += (J + J') * S + VΔV = (V * ΔV') + K = F .* VΔV + term += S * (K + K') + + # dV₁ diagonal contribution (diagonal of dU₁ is gauged away) + if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent) + L = Diagonal(diag(VΔV)) + term += 0.5 * S⁻¹ * (L' - L) + end + ΔA = U * term * V + + # Projector contribution for non-square A + UUd = U * U' + VdV = V' * V + Uproj = one(UUd) - UUd + Vproj = one(VdV) - VdV + ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj # Old wrong stuff + + return ΔA +end + +# Loss function taking the nfirst first singular vectors into account +function nfirst_loss(A, svdfunc; nfirst=1) + U, _, V = svdfunc(A) + U = convert(Array, U) + V = convert(Array, V) + return real(sum([U[i, i] * V[i, i] for i in 1:nfirst])) +end + +m, n = 30, 20 +dtype = ComplexF64 +χ = 15 +r = TensorMap(randn, dtype, ℂ^m ← ℂ^n) + +ltensorkit, gtensorkit = withgradient(A -> nfirst_loss(A, x -> oldsvd(x, χ); nfirst=3), r) +litersvd, gitersvd = withgradient(A -> nfirst_loss(A, x -> itersvd(x, χ); nfirst=3), r) + +@show ltensorkit ≈ litersvd +@show gtensorkit ≈ gitersvd \ No newline at end of file diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index b2dcebbe..456482e0 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -6,12 +6,13 @@ using TensorKit, KrylovKit, MPSKit, OptimKit, Base.Threads, Base.Iterators, Parameters, Printf using ChainRulesCore -using LinearAlgebra: LinearAlgebra +using LinearAlgebra export CTMRG, CTMRG2 export leading_boundary include("utility/util.jl") +include("utility/svd.jl") include("states/abstractpeps.jl") include("states/infinitepeps.jl") @@ -46,5 +47,6 @@ export InfinitePEPO, InfiniteTransferPEPO export initializeMPS, initializePEPS export PEPOOptimize, pepo_opt_environments export symmetrize, None, Depth, Full +export itersvd end # module diff --git a/src/utility/svd.jl b/src/utility/svd.jl new file mode 100644 index 00000000..d3b149d0 --- /dev/null +++ b/src/utility/svd.jl @@ -0,0 +1,143 @@ +# Computation of F in SVD adjoint, including Lorentzian broadening +function invert_S²(S::AbstractMatrix{T}, tol::Real; εbroad=0) where {T<:Real} + F = similar(S) + @inbounds for i in axes(F, 1), j in axes(F, 2) + F[i, j] = if i == j + zero(T) + else + sᵢ, sⱼ = S[i, i], S[j, j] + Δs = abs(sⱼ - sᵢ) < tol ? tol : sⱼ^2 - sᵢ^2 + εbroad > 0 && (Δs = lorentz_broaden(Δs, εbroad)) + 1 / Δs + end + end + return F +end + +# Lorentzian broadening for SVD adjoint singularities +function lorentz_broaden(x::Real, ε=1e-12) + x′ = 1 / x + return x′ / (x′^2 + ε) +end + +# Proper truncated SVD using iterative solver +function itersvd( + t::AbstractTensorMap, + χ::Int; + εbroad=0, + solverkwargs=(; krylovdim=χ + 5, tol=1e2eps(real(scalartype(t)))), +) + vals, lvecs, rvecs, info = svdsolve(t.data, dim(codomain(t)), χ; solverkwargs...) + truncspace = field(t)^χ + if info.converged < χ # Fall back to dense SVD + @warn "falling back to dense SVD solver since length(S) < χ" + return tsvd(t; trunc=truncdim(χ), alg=TensorKit.SVD()) + else + vals = @view(vals[1:χ]) + lvecs = @view(lvecs[1:χ]) + rvecs = @view(rvecs[1:χ]) + end + U = TensorMap(hcat(lvecs...), codomain(t) ← truncspace) + S = TensorMap(diagm(vals), truncspace ← truncspace) + V = TensorMap(copy(hcat(rvecs...)'), truncspace ← domain(t)) + return U, S, V +end + +# Reverse rule adopted from tsvd! rrule as found in TensorKit.jl +function ChainRulesCore.rrule( + ::typeof(itersvd), t::AbstractTensorMap, χ::Int; εbroad=0, kwargs... +) + U, S, V = itersvd(t, χ; kwargs...) + + function itersvd_pullback((ΔU, ΔS, ΔV)) + ∂t = similar(t) + for (c, b) in blocks(∂t) + copyto!( + b, + itersvd_rev( + block(t, c), + block(U, c), + block(S, c), + block(V, c), + block(ΔU, c), + block(ΔS, c), + block(ΔV, c); + εbroad, + ), + ) + end + return NoTangent(), ∂t, NoTangent() + end + + return (U, S, V), itersvd_pullback +end + +# SVD adjoint with proper truncation +function itersvd_rev( + A::AbstractMatrix, + U::AbstractMatrix, + S::AbstractMatrix, + V::AbstractMatrix, + ΔU, + ΔS, + ΔV; + εbroad=0, + atol::Real=0, + rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4), +) + Ad = copy(A') + tol = atol > 0 ? atol : rtol * S[1, 1] + F = invert_S²(S, tol; εbroad) # Includes Lorentzian broadening + S⁻¹ = pinv(S; atol=tol) + + # dS contribution + term = ΔS isa ZeroTangent ? ΔS : Diagonal(real.(ΔS)) # Implicitly performs 𝕀 ∘ dS + + # dU₁ and dV₁ off-diagonal contribution + J = F .* (U' * ΔU) + term += (J + J') * S + VΔV = (V * ΔV') + K = F .* VΔV + term += S * (K + K') + + # dV₁ diagonal contribution (diagonal of dU₁ is gauged away) + if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent) + L = Diagonal(VΔV) # Implicitly performs 𝕀 ∘ dV + term += 0.5 * S⁻¹ * (L' - L) + end + ΔA = U * term * V + + # Projector contribution for non-square A and dU₂ and dV₂ + UUd = U * U' + VdV = V' * V + Uproj = one(UUd) - UUd + Vproj = one(VdV) - VdV + m, k, n = size(U, 1), size(U, 2), size(V, 2) + dimγ = k * m # Vectorized dimension of γ-matrix + + # Truncation contribution from dU₂ and dV₂ + # TODO: Use KrylovKit instead of IterativeSolvers + Sop = LinearMap(k * m + k * n) do v # Left-preconditioned linear problem + γ = reshape(@view(v[1:dimγ]), (k, m)) + γd = reshape(@view(v[(dimγ + 1):end]), (k, n)) + Γ1 = γ - S⁻¹ * γd * Vproj * Ad + Γ2 = γd - S⁻¹ * γ * Uproj * A + vcat(reshape(Γ1, :), reshape(Γ2, :)) + end + if ΔU isa ZeroTangent && ΔV isa ZeroTangent + γ = gmres(Sop, zeros(eltype(A), k * m + k * n)) + else + # Explicit left-preconditioning + # Set relative tolerance to machine precision to converge SVD gradient error properly + γ = gmres( + Sop, + vcat(reshape(S⁻¹ * ΔU' * Uproj, :), reshape(S⁻¹ * ΔV * Vproj, :)); + reltol=eps(real(eltype(A))), + ) + end + γA = reshape(@view(γ[1:dimγ]), k, m) + γAd = reshape(@view(γ[(dimγ + 1):end]), k, n) + ΔA += Uproj * γA' * V + U * γAd * Vproj + + return ΔA +end From 51507ce14eae2fff8cfaa94089e58eb009e28b26 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 5 Mar 2024 11:16:41 +0100 Subject: [PATCH 02/27] Use KrylovKit.linsolve for truncation linear problem, make loss function differentiable --- Project.toml | 1 + examples/test_svd_adjoint.jl | 34 +++++++++++++++++++--------------- src/utility/svd.jl | 28 ++++++++++++---------------- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/Project.toml b/Project.toml index d5e3a930..2826518e 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.1.0" [deps] Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MPSKit = "bb1c41ca-d63c-52ed-829e-0820dda26502" diff --git a/examples/test_svd_adjoint.jl b/examples/test_svd_adjoint.jl index e11f0eff..fcd50f47 100644 --- a/examples/test_svd_adjoint.jl +++ b/examples/test_svd_adjoint.jl @@ -1,6 +1,6 @@ using LinearAlgebra using TensorKit -using ChainRulesCore, Zygote +using ChainRulesCore, ChainRulesTestUtils, Zygote using PEPSKit # Non-proper truncated SVD with outdated adjoint @@ -45,9 +45,6 @@ function oldsvd_rev( atol::Real=0, rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4), ) - S = diagm(S) - V = copy(V') - tol = atol > 0 ? atol : rtol * S[1, 1] F = PEPSKit.invert_S²(S, tol; εbroad) # Includes Lorentzian broadening S⁻¹ = pinv(S; atol=tol) @@ -74,26 +71,33 @@ function oldsvd_rev( VdV = V' * V Uproj = one(UUd) - UUd Vproj = one(VdV) - VdV - ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj # Old wrong stuff + ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj # Wrong truncation contribution return ΔA end -# Loss function taking the nfirst first singular vectors into account -function nfirst_loss(A, svdfunc; nfirst=1) +# Gauge-invariant loss function +function lossfun(A, svdfunc) U, _, V = svdfunc(A) - U = convert(Array, U) - V = convert(Array, V) - return real(sum([U[i, i] * V[i, i] for i in 1:nfirst])) + # return real(sum((U * V).data)) # TODO: code up sum for AbstractTensorMap with rrule + return real(tr(U * V)) # trace only allows for m=n end -m, n = 30, 20 +m, n = 30, 30 dtype = ComplexF64 -χ = 15 +χ = 20 r = TensorMap(randn, dtype, ℂ^m ← ℂ^n) -ltensorkit, gtensorkit = withgradient(A -> nfirst_loss(A, x -> oldsvd(x, χ); nfirst=3), r) -litersvd, gitersvd = withgradient(A -> nfirst_loss(A, x -> itersvd(x, χ); nfirst=3), r) +println("Non-truncated SVD") +ltensorkit, gtensorkit = withgradient(A -> lossfun(A, x -> oldsvd(x, min(m, n))), r) +litersvd, gitersvd = withgradient(A -> lossfun(A, x -> itersvd(x, min(m, n))), r) +@show ltensorkit ≈ litersvd +@show norm(gtensorkit[1] - gitersvd[1]) +println("\nTruncated SVD to χ=$χ:") +ltensorkit, gtensorkit = withgradient(A -> lossfun(A, x -> oldsvd(x, χ)), r) +litersvd, gitersvd = withgradient(A -> lossfun(A, x -> itersvd(x, χ)), r) @show ltensorkit ≈ litersvd -@show gtensorkit ≈ gitersvd \ No newline at end of file +@show norm(gtensorkit[1] - gitersvd[1]) + +# TODO: Finite-difference check via test_rrule diff --git a/src/utility/svd.jl b/src/utility/svd.jl index d3b149d0..ee3cbd4b 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -116,28 +116,24 @@ function itersvd_rev( dimγ = k * m # Vectorized dimension of γ-matrix # Truncation contribution from dU₂ and dV₂ - # TODO: Use KrylovKit instead of IterativeSolvers - Sop = LinearMap(k * m + k * n) do v # Left-preconditioned linear problem - γ = reshape(@view(v[1:dimγ]), (k, m)) - γd = reshape(@view(v[(dimγ + 1):end]), (k, n)) - Γ1 = γ - S⁻¹ * γd * Vproj * Ad - Γ2 = γd - S⁻¹ * γ * Uproj * A - vcat(reshape(Γ1, :), reshape(Γ2, :)) + function svdlinprob(v) # Left-preconditioned linear problem + γ1 = reshape(@view(v[1:dimγ]), (k, m)) + γ2 = reshape(@view(v[(dimγ + 1):end]), (k, n)) + Γ1 = γ1 - S⁻¹ * γ2 * Vproj * Ad + Γ2 = γ2 - S⁻¹ * γ1 * Uproj * A + return vcat(reshape(Γ1, :), reshape(Γ2, :)) end if ΔU isa ZeroTangent && ΔV isa ZeroTangent - γ = gmres(Sop, zeros(eltype(A), k * m + k * n)) + γ = linsolve(Sop, zeros(eltype(A), k * m + k * n)) else # Explicit left-preconditioning # Set relative tolerance to machine precision to converge SVD gradient error properly - γ = gmres( - Sop, - vcat(reshape(S⁻¹ * ΔU' * Uproj, :), reshape(S⁻¹ * ΔV * Vproj, :)); - reltol=eps(real(eltype(A))), - ) + y = vcat(reshape(S⁻¹ * ΔU' * Uproj, :), reshape(S⁻¹ * ΔV * Vproj, :)) + γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A)))) end - γA = reshape(@view(γ[1:dimγ]), k, m) - γAd = reshape(@view(γ[(dimγ + 1):end]), k, n) - ΔA += Uproj * γA' * V + U * γAd * Vproj + γA1 = reshape(@view(γ[1:dimγ]), k, m) + γA2 = reshape(@view(γ[(dimγ + 1):end]), k, n) + ΔA += Uproj * γA1' * V + U * γA2 * Vproj return ΔA end From d3a31fb55d1661d8cd0c01778f53fd64714b933d Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 5 Mar 2024 16:09:22 +0100 Subject: [PATCH 03/27] Improve loss function, compare SVD gradient with TensorKit.tsvd gradient --- examples/test_svd_adjoint.jl | 38 +++++++++++++++++++++--------------- src/utility/svd.jl | 1 + 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/examples/test_svd_adjoint.jl b/examples/test_svd_adjoint.jl index fcd50f47..69124870 100644 --- a/examples/test_svd_adjoint.jl +++ b/examples/test_svd_adjoint.jl @@ -3,7 +3,7 @@ using TensorKit using ChainRulesCore, ChainRulesTestUtils, Zygote using PEPSKit -# Non-proper truncated SVD with outdated adjoint +# Truncated SVD with outdated adjoint oldsvd(t::AbstractTensorMap, χ::Int; kwargs...) = itersvd(t, χ; kwargs...) # Outdated adjoint not taking truncated part into account @@ -77,27 +77,33 @@ function oldsvd_rev( end # Gauge-invariant loss function -function lossfun(A, svdfunc) +function lossfun(A, R=TensorMap(randn, space(A)), svdfunc=tsvd) U, _, V = svdfunc(A) - # return real(sum((U * V).data)) # TODO: code up sum for AbstractTensorMap with rrule - return real(tr(U * V)) # trace only allows for m=n + return real(dot(R, U * V)) # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n end -m, n = 30, 30 +m, n = 20, 30 dtype = ComplexF64 -χ = 20 +χ = 15 r = TensorMap(randn, dtype, ℂ^m ← ℂ^n) +R = TensorMap(randn, space(r)) -println("Non-truncated SVD") -ltensorkit, gtensorkit = withgradient(A -> lossfun(A, x -> oldsvd(x, min(m, n))), r) -litersvd, gitersvd = withgradient(A -> lossfun(A, x -> itersvd(x, min(m, n))), r) -@show ltensorkit ≈ litersvd +println("Non-truncated SVD:") +loldsvd, goldsvd = withgradient(A -> lossfun(A, R, x -> oldsvd(x, min(m, n))), r) +ltensorkit, gtensorkit = withgradient( + A -> lossfun(A, R, x -> tsvd(x; trunc=truncdim(min(m, n)))), r +) +litersvd, gitersvd = withgradient(A -> lossfun(A, R, x -> itersvd(x, min(m, n))), r) +@show loldsvd ≈ ltensorkit ≈ litersvd +@show norm(gtensorkit[1] - goldsvd[1]) @show norm(gtensorkit[1] - gitersvd[1]) -println("\nTruncated SVD to χ=$χ:") -ltensorkit, gtensorkit = withgradient(A -> lossfun(A, x -> oldsvd(x, χ)), r) -litersvd, gitersvd = withgradient(A -> lossfun(A, x -> itersvd(x, χ)), r) -@show ltensorkit ≈ litersvd +println("\nTruncated SVD with χ=$χ:") +loldsvd, goldsvd = withgradient(A -> lossfun(A, R, x -> oldsvd(x, χ)), r) +ltensorkit, gtensorkit = withgradient( + A -> lossfun(A, R, x -> tsvd(x; trunc=truncdim(χ))), r +) +litersvd, gitersvd = withgradient(A -> lossfun(A, R, x -> itersvd(x, χ)), r) +@show loldsvd ≈ ltensorkit ≈ litersvd +@show norm(gtensorkit[1] - goldsvd[1]) @show norm(gtensorkit[1] - gitersvd[1]) - -# TODO: Finite-difference check via test_rrule diff --git a/src/utility/svd.jl b/src/utility/svd.jl index ee3cbd4b..975c72cb 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -117,6 +117,7 @@ function itersvd_rev( # Truncation contribution from dU₂ and dV₂ function svdlinprob(v) # Left-preconditioned linear problem + # TODO: make v a Tuple instead of concatening two vectors γ1 = reshape(@view(v[1:dimγ]), (k, m)) γ2 = reshape(@view(v[(dimγ + 1):end]), (k, n)) Γ1 = γ1 - S⁻¹ * γ2 * Vproj * Ad From 8ac307e490b6fd7c68793f12670b478e2277e999 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 26 Mar 2024 14:46:50 +0100 Subject: [PATCH 04/27] Update SVD adjoint linear problem to use Tuple and remove reshapes --- src/utility/svd.jl | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 975c72cb..ccca32d4 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -112,29 +112,21 @@ function itersvd_rev( VdV = V' * V Uproj = one(UUd) - UUd Vproj = one(VdV) - VdV - m, k, n = size(U, 1), size(U, 2), size(V, 2) - dimγ = k * m # Vectorized dimension of γ-matrix # Truncation contribution from dU₂ and dV₂ function svdlinprob(v) # Left-preconditioned linear problem - # TODO: make v a Tuple instead of concatening two vectors - γ1 = reshape(@view(v[1:dimγ]), (k, m)) - γ2 = reshape(@view(v[(dimγ + 1):end]), (k, n)) - Γ1 = γ1 - S⁻¹ * γ2 * Vproj * Ad - Γ2 = γ2 - S⁻¹ * γ1 * Uproj * A - return vcat(reshape(Γ1, :), reshape(Γ2, :)) + Γ1 = v[1] - S⁻¹ * v[2] * Vproj * Ad + Γ2 = v[2] - S⁻¹ * v[1] * Uproj * A + return (Γ1, Γ2) end if ΔU isa ZeroTangent && ΔV isa ZeroTangent - γ = linsolve(Sop, zeros(eltype(A), k * m + k * n)) + m, k, n = size(U, 1), size(U, 2), size(V, 2) + γ = linsolve(Sop, (zeros(eltype(A), k * m), zeros(eltype(A), k * n))) else - # Explicit left-preconditioning - # Set relative tolerance to machine precision to converge SVD gradient error properly - y = vcat(reshape(S⁻¹ * ΔU' * Uproj, :), reshape(S⁻¹ * ΔV * Vproj, :)) + y = (S⁻¹ * ΔU' * Uproj, S⁻¹ * ΔV * Vproj) γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A)))) end - γA1 = reshape(@view(γ[1:dimγ]), k, m) - γA2 = reshape(@view(γ[(dimγ + 1):end]), k, n) - ΔA += Uproj * γA1' * V + U * γA2 * Vproj + ΔA += Uproj * γ[1]' * V + U * γ[2] * Vproj return ΔA end From 80db1c05d5d388dd73d48e1cc888030f4684e2ce Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Thu, 11 Apr 2024 12:23:23 +0200 Subject: [PATCH 05/27] Fix ZeroTangent case for linear problem --- src/utility/svd.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utility/svd.jl b/src/utility/svd.jl index ccca32d4..af9baa2a 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -121,7 +121,8 @@ function itersvd_rev( end if ΔU isa ZeroTangent && ΔV isa ZeroTangent m, k, n = size(U, 1), size(U, 2), size(V, 2) - γ = linsolve(Sop, (zeros(eltype(A), k * m), zeros(eltype(A), k * n))) + y = (zeros(eltype(A), k * m), zeros(eltype(A), k * n)) + γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A)))) else y = (S⁻¹ * ΔU' * Uproj, S⁻¹ * ΔV * Vproj) γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A)))) From ae96298e0b1201b2d39d8d3dc6129d99449fc9d9 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Thu, 20 Jun 2024 17:16:31 +0200 Subject: [PATCH 06/27] Add SVD wrapper structs and function, utilize tsvd machinery, convert SVD adjoint script to test --- examples/test_svd_adjoint.jl | 109 ------------------ src/PEPSKit.jl | 1 + src/utility/svd.jl | 210 +++++++++++++++++++++++------------ test/svdwrap.jl | 63 +++++++++++ 4 files changed, 206 insertions(+), 177 deletions(-) delete mode 100644 examples/test_svd_adjoint.jl create mode 100644 test/svdwrap.jl diff --git a/examples/test_svd_adjoint.jl b/examples/test_svd_adjoint.jl deleted file mode 100644 index 69124870..00000000 --- a/examples/test_svd_adjoint.jl +++ /dev/null @@ -1,109 +0,0 @@ -using LinearAlgebra -using TensorKit -using ChainRulesCore, ChainRulesTestUtils, Zygote -using PEPSKit - -# Truncated SVD with outdated adjoint -oldsvd(t::AbstractTensorMap, χ::Int; kwargs...) = itersvd(t, χ; kwargs...) - -# Outdated adjoint not taking truncated part into account -function ChainRulesCore.rrule( - ::typeof(oldsvd), t::AbstractTensorMap, χ::Int; εbroad=0, kwargs... -) - U, S, V = oldsvd(t, χ; kwargs...) - - function oldsvd_pullback((ΔU, ΔS, ΔV)) - ∂t = similar(t) - for (c, b) in blocks(∂t) - copyto!( - b, - oldsvd_rev( - block(U, c), - block(S, c), - block(V, c), - block(ΔU, c), - block(ΔS, c), - block(ΔV, c); - εbroad, - ), - ) - end - return NoTangent(), ∂t, NoTangent() - end - - return (U, S, V), oldsvd_pullback -end - -function oldsvd_rev( - U::AbstractMatrix, - S::AbstractMatrix, - V::AbstractMatrix, - ΔU, - ΔS, - ΔV; - εbroad=0, - atol::Real=0, - rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4), -) - tol = atol > 0 ? atol : rtol * S[1, 1] - F = PEPSKit.invert_S²(S, tol; εbroad) # Includes Lorentzian broadening - S⁻¹ = pinv(S; atol=tol) - - # dS contribution - term = ΔS isa ZeroTangent ? ΔS : Diagonal(diag(ΔS)) - - # dU₁ and dV₁ off-diagonal contribution - J = F .* (U' * ΔU) - term += (J + J') * S - VΔV = (V * ΔV') - K = F .* VΔV - term += S * (K + K') - - # dV₁ diagonal contribution (diagonal of dU₁ is gauged away) - if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent) - L = Diagonal(diag(VΔV)) - term += 0.5 * S⁻¹ * (L' - L) - end - ΔA = U * term * V - - # Projector contribution for non-square A - UUd = U * U' - VdV = V' * V - Uproj = one(UUd) - UUd - Vproj = one(VdV) - VdV - ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj # Wrong truncation contribution - - return ΔA -end - -# Gauge-invariant loss function -function lossfun(A, R=TensorMap(randn, space(A)), svdfunc=tsvd) - U, _, V = svdfunc(A) - return real(dot(R, U * V)) # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n -end - -m, n = 20, 30 -dtype = ComplexF64 -χ = 15 -r = TensorMap(randn, dtype, ℂ^m ← ℂ^n) -R = TensorMap(randn, space(r)) - -println("Non-truncated SVD:") -loldsvd, goldsvd = withgradient(A -> lossfun(A, R, x -> oldsvd(x, min(m, n))), r) -ltensorkit, gtensorkit = withgradient( - A -> lossfun(A, R, x -> tsvd(x; trunc=truncdim(min(m, n)))), r -) -litersvd, gitersvd = withgradient(A -> lossfun(A, R, x -> itersvd(x, min(m, n))), r) -@show loldsvd ≈ ltensorkit ≈ litersvd -@show norm(gtensorkit[1] - goldsvd[1]) -@show norm(gtensorkit[1] - gitersvd[1]) - -println("\nTruncated SVD with χ=$χ:") -loldsvd, goldsvd = withgradient(A -> lossfun(A, R, x -> oldsvd(x, χ)), r) -ltensorkit, gtensorkit = withgradient( - A -> lossfun(A, R, x -> tsvd(x; trunc=truncdim(χ))), r -) -litersvd, gitersvd = withgradient(A -> lossfun(A, R, x -> itersvd(x, χ)), r) -@show loldsvd ≈ ltensorkit ≈ litersvd -@show norm(gtensorkit[1] - goldsvd[1]) -@show norm(gtensorkit[1] - gitersvd[1]) diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index 456482e0..f195b882 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -42,6 +42,7 @@ module Defaults const tol = 1e-12 end +export FullSVD, IterSVD, OldSVD, svdwrap export InfinitePEPS, InfiniteTransferPEPS export InfinitePEPO, InfiniteTransferPEPO export initializeMPS, initializePEPS diff --git a/src/utility/svd.jl b/src/utility/svd.jl index af9baa2a..d06b27c8 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -1,97 +1,164 @@ -# Computation of F in SVD adjoint, including Lorentzian broadening -function invert_S²(S::AbstractMatrix{T}, tol::Real; εbroad=0) where {T<:Real} - F = similar(S) - @inbounds for i in axes(F, 1), j in axes(F, 2) - F[i, j] = if i == j - zero(T) - else - sᵢ, sⱼ = S[i, i], S[j, j] - Δs = abs(sⱼ - sᵢ) < tol ? tol : sⱼ^2 - sᵢ^2 - εbroad > 0 && (Δs = lorentz_broaden(Δs, εbroad)) - 1 / Δs +import TensorKit: + SectorDict, + _empty_svdtensors, + _compute_svddata!, + _truncate!, + _implement_svdtruncation!, + _create_svdtensors + +# Plain copy of tsvd!(...) from TensorKit to lift alg type restriction +function _tensorkit_svd!( + t::TensorMap; + trunc::TruncationScheme=TensorKit.NoTruncation(), + p::Real=2, + alg=TensorKit.SVD(), +) + #early return + if isempty(blocksectors(t)) + truncerr = zero(real(scalartype(t))) + return _empty_svdtensors(t)..., truncerr + end + + S = spacetype(t) + Udata, Σdata, Vdata, dims = _compute_svddata!(t, alg) + if !isa(trunc, TensorKit.NoTruncation) + Σdata, truncerr = _truncate!(Σdata, trunc, p) + Udata, Σdata, Vdata, dims = _implement_svdtruncation!(t, Udata, Σdata, Vdata, dims) + W = S(dims) + else + truncerr = abs(zero(scalartype(t))) + W = S(dims) + if length(domain(t)) == 1 && domain(t)[1] ≅ W + W = domain(t)[1] + elseif length(codomain(t)) == 1 && codomain(t)[1] ≅ W + W = codomain(t)[1] end end - return F + return _create_svdtensors(t, Udata, Σdata, Vdata, W)..., truncerr end -# Lorentzian broadening for SVD adjoint singularities -function lorentz_broaden(x::Real, ε=1e-12) - x′ = 1 / x - return x′ / (x′^2 + ε) +# Wrapper struct around TensorKit's SVD algorithms +@kwdef struct FullSVD + alg::Union{<:TensorKit.SVD,<:TensorKit.SDD} = TensorKit.SVD() + lorentz_broad::Float64 = 0.0 +end + +function svdwrap(t::AbstractTensorMap, alg::FullSVD; trunc=notrunc(), kwargs...) + # TODO: Replace _tensorkit_svd! with just tsvd eventually to use the full TensorKit machinery + return _tensorkit_svd!(copy(t); trunc, alg.alg) end -# Proper truncated SVD using iterative solver -function itersvd( - t::AbstractTensorMap, - χ::Int; - εbroad=0, - solverkwargs=(; krylovdim=χ + 5, tol=1e2eps(real(scalartype(t)))), +function ChainRulesCore.rrule( + ::typeof(svdwrap), t::AbstractTensorMap, alg::FullSVD; trunc=notrunc(), kwargs... ) - vals, lvecs, rvecs, info = svdsolve(t.data, dim(codomain(t)), χ; solverkwargs...) - truncspace = field(t)^χ - if info.converged < χ # Fall back to dense SVD - @warn "falling back to dense SVD solver since length(S) < χ" - return tsvd(t; trunc=truncdim(χ), alg=TensorKit.SVD()) - else - vals = @view(vals[1:χ]) - lvecs = @view(lvecs[1:χ]) - rvecs = @view(rvecs[1:χ]) + tsvd_return, tsvd!_pullback = ChainRulesCore.rrule(tsvd!, t; alg=TensorKit.SVD(), trunc) + function svdwrap_fullsvd_pullback(Δ) + return tsvd!_pullback(Δ)..., NoTangent() + end + return tsvd_return, svdwrap_fullsvd_pullback +end + +# Wrapper around Krylov Kit's GKL iterative SVD solver +@kwdef struct IterSVD + alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25) + howmany::Int = 20 + lorentz_broad::Float64 = 0.0 + alg_rrule::Union{GMRES,BiCGStab,Arnoldi} = GMRES(; tol=1e-14) +end + +function svdwrap(t::AbstractTensorMap, alg::IterSVD; trunc=notrunc(), kwargs...) + U, S, V, = _tensorkit_svd!(copy(t); trunc, alg) # TODO: Also replace this with tsvd eventually + ϵ = norm(t - U * S * V) # Compute truncation error separately + return U, S, V, ϵ +end + +# Compute SVD data block-wise using KrylovKit algorithm +function TensorKit._compute_svddata!(t::TensorMap, alg::IterSVD) + InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!) + I = sectortype(t) + A = storagetype(t) + Udata = SectorDict{I,A}() + Vdata = SectorDict{I,A}() + dims = SectorDict{I,Int}() + local Σdata + for (c, b) in blocks(t) + x₀ = randn(eltype(b), size(b, 1)) + Σ, lvecs, rvecs, info = svdsolve(b, x₀, alg.howmany, :LR, alg.alg) + if info.converged < alg.howmany # Fall back to dense SVD if not properly converged + U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) + Udata[c] = U + Vdata[c] = V + else + Udata[c] = stack(lvecs) + Vdata[c] = stack(rvecs)' + end + if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction + Σdata[c] = Σ + else + Σdata = SectorDict(c => Σ) + end + dims[c] = length(Σ) end - U = TensorMap(hcat(lvecs...), codomain(t) ← truncspace) - S = TensorMap(diagm(vals), truncspace ← truncspace) - V = TensorMap(copy(hcat(rvecs...)'), truncspace ← domain(t)) - return U, S, V + return Udata, Σdata, Vdata, dims +end + +# TODO: IterSVD adjoint utilizing KryloVKit svdsolve adjoint + +# Full SVD with old adjoint that doesn't account for truncation properly +@kwdef struct OldSVD{A<:Union{FullSVD,IterSVD}} + alg::A = FullSVD() + lorentz_broad::Float64 = 0.0 +end + +function svdwrap(t::AbstractTensorMap, alg::OldSVD; kwargs...) + return svdwrap(t, alg.alg; kwargs...) end -# Reverse rule adopted from tsvd! rrule as found in TensorKit.jl +# Outdated adjoint not taking truncated part into account for testing purposes function ChainRulesCore.rrule( - ::typeof(itersvd), t::AbstractTensorMap, χ::Int; εbroad=0, kwargs... + ::typeof(svdwrap), t::AbstractTensorMap, alg::OldSVD; kwargs... ) - U, S, V = itersvd(t, χ; kwargs...) + U, S, V, ϵ = svdwrap(t, alg; kwargs...) - function itersvd_pullback((ΔU, ΔS, ΔV)) + function svdwrap_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) ∂t = similar(t) for (c, b) in blocks(∂t) copyto!( b, - itersvd_rev( - block(t, c), + oldsvd_rev( block(U, c), block(S, c), block(V, c), block(ΔU, c), block(ΔS, c), block(ΔV, c); - εbroad, + lorentz_broad=alg.lorentz_broad, ), ) end return NoTangent(), ∂t, NoTangent() end - return (U, S, V), itersvd_pullback + return (U, S, V, ϵ), svdwrap_oldsvd_pullback end -# SVD adjoint with proper truncation -function itersvd_rev( - A::AbstractMatrix, +function oldsvd_rev( U::AbstractMatrix, S::AbstractMatrix, V::AbstractMatrix, ΔU, ΔS, ΔV; - εbroad=0, + lorentz_broad=0, atol::Real=0, rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4), ) - Ad = copy(A') tol = atol > 0 ? atol : rtol * S[1, 1] - F = invert_S²(S, tol; εbroad) # Includes Lorentzian broadening + F = _invert_S²(S, tol, lorentz_broad) # Includes Lorentzian broadening S⁻¹ = pinv(S; atol=tol) # dS contribution - term = ΔS isa ZeroTangent ? ΔS : Diagonal(real.(ΔS)) # Implicitly performs 𝕀 ∘ dS + term = ΔS isa ZeroTangent ? ΔS : Diagonal(diag(ΔS)) # dU₁ and dV₁ off-diagonal contribution J = F .* (U' * ΔU) @@ -102,32 +169,39 @@ function itersvd_rev( # dV₁ diagonal contribution (diagonal of dU₁ is gauged away) if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent) - L = Diagonal(VΔV) # Implicitly performs 𝕀 ∘ dV + L = Diagonal(diag(VΔV)) term += 0.5 * S⁻¹ * (L' - L) end ΔA = U * term * V - # Projector contribution for non-square A and dU₂ and dV₂ + # Projector contribution for non-square A UUd = U * U' VdV = V' * V Uproj = one(UUd) - UUd Vproj = one(VdV) - VdV - - # Truncation contribution from dU₂ and dV₂ - function svdlinprob(v) # Left-preconditioned linear problem - Γ1 = v[1] - S⁻¹ * v[2] * Vproj * Ad - Γ2 = v[2] - S⁻¹ * v[1] * Uproj * A - return (Γ1, Γ2) - end - if ΔU isa ZeroTangent && ΔV isa ZeroTangent - m, k, n = size(U, 1), size(U, 2), size(V, 2) - y = (zeros(eltype(A), k * m), zeros(eltype(A), k * n)) - γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A)))) - else - y = (S⁻¹ * ΔU' * Uproj, S⁻¹ * ΔV * Vproj) - γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A)))) - end - ΔA += Uproj * γ[1]' * V + U * γ[2] * Vproj + ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj # Wrong truncation contribution return ΔA end + +# Computation of F in SVD adjoint, including Lorentzian broadening +function _invert_S²(S::AbstractMatrix{T}, tol::Real, ε=0) where {T<:Real} + F = similar(S) + @inbounds for i in axes(F, 1), j in axes(F, 2) + F[i, j] = if i == j + zero(T) + else + sᵢ, sⱼ = S[i, i], S[j, j] + Δs = abs(sⱼ - sᵢ) < tol ? tol : sⱼ^2 - sᵢ^2 + ε > 0 && (Δs = _lorentz_broaden(Δs, ε)) + 1 / Δs + end + end + return F +end + +# Lorentzian broadening for SVD adjoint F-singularities +function _lorentz_broaden(x::Real, ε=1e-12) + x′ = 1 / x + return x′ / (x′^2 + ε) +end \ No newline at end of file diff --git a/test/svdwrap.jl b/test/svdwrap.jl new file mode 100644 index 00000000..3c3a0748 --- /dev/null +++ b/test/svdwrap.jl @@ -0,0 +1,63 @@ +using Test +using Random +using LinearAlgebra +using TensorKit +using KrylovKit +using ChainRulesCore, Zygote +using PEPSKit + +# Gauge-invariant loss function +function lossfun(A, alg, R=TensorMap(randn, space(A)); trunc=notrunc()) + U, _, V, = svdwrap(A, alg; trunc) + return real(dot(R, U * V)) # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n +end + +m, n = 20, 30 +dtype = ComplexF64 +χ = 12 +trunc = truncdim(χ) +# lorentz_broad = 1e-12 +adjoint_tol = 1e-16 +rtol = 1e-9 +r = TensorMap(randn, dtype, ℂ^m ← ℂ^n) +R = TensorMap(randn, space(r)) + +@testset "Non-truncacted SVD" begin + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R), r) + l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r) + # l_itersvd, g_itersvd = withgradient( + # A -> lossfun(A, IterSVD(; howmany=min(m, n), adjoint_tol), R), r + # ) + + @test l_oldsvd ≈ l_fullsvd + # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd + @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol + # @test norm(g_tsvd[1] - g_itersvd[1]) / norm(g_tsvd[1]) < rtol +end + +@testset "Truncated SVD with χ=$χ" begin + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R; trunc), r) + l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R; trunc), r) + # l_itersvd, g_itersvd = withgradient( + # A -> lossfun(A, IterSVD(; howmany=χ, adjoint_tol), R; trunc), r + # ) + + @test l_oldsvd ≈ l_fullsvd + # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd + @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol + # @test norm(g_tsvd[1] - g_itersvd[1]) / norm(g_tsvd[1]) < rtol +end + +# @testset "Truncated SVD with χ=$χ and ε=$lorentz_broad broadening" begin +# l_fullsvd, g_fullsvd = withgradient( +# A -> lossfun(A, FullSVD(; lorentz_broad, R; trunc), r +# ) +# l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(; lorentz_broad), R; trunc), r) +# l_itersvd, g_itersvd = withgradient( +# A -> lossfun(A, IterSVD(; howmany=χ, lorentz_broad), R; trunc), r +# ) + +# @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd +# @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol +# @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol +# end From 78ab1527b74c14e5df0ce3cd8db6e1a17b99ab53 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Thu, 20 Jun 2024 17:46:25 +0200 Subject: [PATCH 07/27] Copy ctmrg.jl from master, add svdalg field to CTMRG, use svdwrap in left_move --- src/algorithms/ctmrg.jl | 653 +++++++++++++++++++++++++--------------- 1 file changed, 403 insertions(+), 250 deletions(-) diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index 932ed866..65312e0c 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -1,172 +1,381 @@ -@with_kw struct CTMRG #<: Algorithm +# TODO: add abstract Algorithm type? +""" + struct CTMRG(; trscheme = TensorKit.notrunc(), tol = Defaults.ctmrg_tol, + maxiter = Defaults.ctmrg_maxiter, miniter = Defaults.ctmrg_miniter, + verbosity = 0, fixedspace = false) + +Algorithm struct that represents the CTMRG algorithm for contracting infinite PEPS. +The projector bond dimensions are set via `trscheme` which controls the truncation +properties inside of `TensorKit.tsvd`. Each CTMRG run is converged up to `tol` +where the singular value convergence of the corners as well as the norm is checked. +The maximal and minimal number of CTMRG iterations is set with `maxiter` and `miniter`. +Different levels of output information are printed depending on `verbosity` (0, 1 or 2). +Regardless of the truncation scheme, the space can be kept fixed with `fixedspace`. +""" +@kwdef struct CTMRG{S} + tol::Float64 = Defaults.ctmrg_tol + maxiter::Int = Defaults.ctmrg_maxiter + miniter::Int = Defaults.ctmrg_miniter + verbosity::Int = 0 + svdalg::S = FullSVD() trscheme::TruncationScheme = TensorKit.notrunc() - tol::Float64 = Defaults.tol - maxiter::Integer = Defaults.maxiter - miniter::Integer = 4 - verbose::Integer = 0 fixedspace::Bool = false end -@with_kw struct CTMRG2 #<: Algorithm - trscheme::TruncationScheme = TensorKit.notrunc() - tol::Float64 = Defaults.tol - maxiter::Integer = Defaults.maxiter - miniter::Integer = 4 - verbose::Integer = 0 +""" + MPSKit.leading_boundary([envinit], state, alg::CTMRG) + +Contract `state` using CTMRG and return the CTM environment. +Per default, a random initial environment is used. +""" +function MPSKit.leading_boundary(state, alg::CTMRG) + return MPSKit.leading_boundary(CTMRGEnv(state), state, alg) end +function MPSKit.leading_boundary(envinit, state, alg::CTMRG) + normold = 1.0 + CSold = map(x -> tsvd(x; alg=TensorKit.SVD())[2], envinit.corners) + TSold = map(x -> tsvd(x; alg=TensorKit.SVD())[2], envinit.edges) + ϵold = 1.0 + env = deepcopy(envinit) + + for i in 1:(alg.maxiter) + env, ϵ = ctmrg_iter(state, env, alg) # Grow and renormalize in all 4 directions + + # Compute convergence criteria and take max (TODO: How should we handle logging all of this?) + Δϵ = abs((ϵold - ϵ) / ϵold) + normnew = norm(state, env) + Δnorm = abs(normold - normnew) / abs(normold) + CSnew = map(c -> tsvd(c; alg=TensorKit.SVD())[2], env.corners) + ΔCS = maximum(zip(CSold, CSnew)) do (c_old, c_new) + # only compute the difference on the smallest part of the spaces + smallest = infimum(MPSKit._firstspace(c_old), MPSKit._firstspace(c_new)) + e_old = isometry(MPSKit._firstspace(c_old), smallest) + e_new = isometry(MPSKit._firstspace(c_new), smallest) + return norm(e_new' * c_new * e_new - e_old' * c_old * e_old) + end + TSnew = map(t -> tsvd(t; alg=TensorKit.SVD())[2], env.edges) -function MPSKit.leading_boundary(peps::InfinitePEPS, alg::CTMRG, envs=CTMRGEnv(peps)) - return MPSKit.leading_boundary(peps, peps, alg, envs) -end; + ΔTS = maximum(zip(TSold, TSnew)) do (t_old, t_new) + MPSKit._firstspace(t_old) == MPSKit._firstspace(t_new) || + return scalartype(t_old)(Inf) + # TODO: implement when spaces aren't the same + return norm(t_new - t_old) + end -function MPSKit.leading_boundary( - peps_above::InfinitePEPS, - peps_below::InfinitePEPS, - alg::CTMRG, - envs=CTMRGEnv(peps_above, peps_below), -) - err = Inf - iter = 1 - - #for convergence criterium we use the on site contracted boundary - #this convergences, though the value depends on the bond dimension χ - old_norm = 1.0 - new_norm = old_norm - ϵ₁ = 1.0 - while (err > alg.tol && iter <= alg.maxiter) || iter <= alg.miniter - ϵ = 0.0 - for i in 1:4 - envs, ϵ₀ = left_move(peps_above, peps_below, alg, envs) - ϵ = max(ϵ, ϵ₀) - envs = rotate_north(envs, EAST) - peps_above = envs.peps_above - peps_below = envs.peps_below + conv_condition = max(Δnorm, ΔCS, ΔTS) < alg.tol && i > alg.miniter + ignore_derivatives() do # Print verbose info + if alg.verbosity > 1 || (alg.verbosity == 1 && (i == 1 || conv_condition)) + @printf( + "CTMRG iter: %3d norm: %.2e Δnorm: %.2e ΔCS: %.2e ΔTS: %.2e ϵ: %.2e Δϵ: %.2e\n", + i, + abs(normnew), + Δnorm, + ΔCS, + ΔTS, + ϵ, + Δϵ + ) + end + alg.verbosity > 0 && + i == alg.maxiter && + @warn( + "CTMRG reached maximal number of iterations at (Δnorm=$Δnorm, ΔCS=$ΔCS, ΔTS=$ΔTS)" + ) + end + conv_condition && break # Converge if maximal Δ falls below tolerance + + # Update convergence criteria + normold = normnew + CSold = CSnew + TSold = TSnew + ϵold = ϵ + end + + # Do one final iteration that does not change the spaces + alg_fixed = CTMRG(; + alg.trscheme, alg.tol, alg.maxiter, alg.miniter, alg.verbosity, fixedspace=true + ) + env′, = ctmrg_iter(state, env, alg_fixed) + envfix = gauge_fix(env, env′) + check_elementwise_convergence(env, envfix; atol=alg.tol^(3 / 4)) || + @warn "CTMRG did not converge elementwise." + return envfix +end + +""" + gauge_fix(envprev::CTMRGEnv{C,T}, envfinal::CTMRGEnv{C,T}) where {C,T} + +Fix the gauge of `envfinal` based on the previous environment `envprev`. +This assumes that the `envfinal` is the result of one CTMRG iteration on `envprev`. +Given that the CTMRG run is converged, the returned environment will be +element-wise converged to `envprev`. +""" +function gauge_fix(envprev::CTMRGEnv{C,T}, envfinal::CTMRGEnv{C,T}) where {C,T} + # Check if spaces in envprev and envfinal are the same + same_spaces = map(Iterators.product(axes(envfinal.edges)...)) do (dir, r, c) + space(envfinal.edges[dir, r, c]) == space(envprev.edges[dir, r, c]) && + space(envfinal.corners[dir, r, c]) == space(envprev.corners[dir, r, c]) + end + @assert all(same_spaces) "Spaces of envprev and envfinal are not the same" + + # Try the "general" algorithm from https://arxiv.org/abs/2311.11894 + signs = map(Iterators.product(axes(envfinal.edges)...)) do (dir, r, c) + # Gather edge tensors and pretend they're InfiniteMPSs + if dir == NORTH + Tsprev = circshift(envprev.edges[dir, r, :], 1 - c) + Tsfinal = circshift(envfinal.edges[dir, r, :], 1 - c) + elseif dir == EAST + Tsprev = circshift(envprev.edges[dir, :, c], 1 - r) + Tsfinal = circshift(envfinal.edges[dir, :, c], 1 - r) + elseif dir == SOUTH + Tsprev = circshift(reverse(envprev.edges[dir, r, :]), c) + Tsfinal = circshift(reverse(envfinal.edges[dir, r, :]), c) + elseif dir == WEST + Tsprev = circshift(reverse(envprev.edges[dir, :, c]), r) + Tsfinal = circshift(reverse(envfinal.edges[dir, :, c]), r) end - new_norm = contract_ctrmg(envs) - - err = abs(old_norm - new_norm) - dϵ = abs((ϵ₁ - ϵ) / ϵ₁) - @ignore_derivatives alg.verbose > 1 && @printf( - "CTMRG: \titeration: %4d\t\terror: %.2e\t\tnorm: %.10e\t\tϵ: %.2e\t\tdϵ: %.2e\n", - iter, - err, - abs(new_norm), - ϵ, - dϵ + # Random MPS of same bond dimension + M = map(Tsfinal) do t + TensorMap(randn, scalartype(t), codomain(t) ← domain(t)) + end + + # Find right fixed points of mixed transfer matrices + ρinit = TensorMap( + randn, + scalartype(T), + MPSKit._lastspace(Tsfinal[end])' ← MPSKit._lastspace(M[end])', ) + ρprev = eigsolve(TransferMatrix(Tsprev, M), ρinit, 1, :LM)[2][1] + ρfinal = eigsolve(TransferMatrix(Tsfinal, M), ρinit, 1, :LM)[2][1] + + # Decompose and multiply + Up, _, Vp = tsvd(ρprev) + Uf, _, Vf = tsvd(ρfinal) + Qprev = Up * Vp + Qfinal = Uf * Vf + σ = Qprev * Qfinal' - old_norm = new_norm - ϵ₁ = ϵ - iter += 1 + return σ end - #@ignore_derivatives @show iter, new_norm, err - @ignore_derivatives iter > alg.maxiter && - alg.verbose > 0 && - @warn "maxiter $(alg.maxiter) reached: error was $(err)" + cornersfix, edgesfix = fix_relative_phases(envfinal, signs) - return envs + # Fix global phase + cornersgfix = map(zip(envprev.corners, cornersfix)) do (Cprev, Cfix) + φ = dot(Cprev, Cfix) + φ' * Cfix + end + edgesgfix = map(zip(envprev.edges, edgesfix)) do (Tprev, Tfix) + φ = dot(Tprev, Tfix) + φ' * Tfix + end + envfix = CTMRGEnv(cornersgfix, edgesgfix) + + return envfix end -function left_move( - peps_above::InfinitePEPS{PType}, - peps_below::InfinitePEPS{PType}, - alg::CTMRG, - envs::CTMRGEnv, -) where {PType} - corners::typeof(envs.corners) = copy(envs.corners) - edges::typeof(envs.edges) = copy(envs.edges) - - above_projector_type = tensormaptype(spacetype(PType), 1, 3, storagetype(PType)) - below_projector_type = tensormaptype(spacetype(PType), 3, 1, storagetype(PType)) +# Explicit fixing of relative phases (doing this compactly in a loop is annoying) +function fix_relative_phases(envfinal::CTMRGEnv, signs) + C1 = map(Iterators.product(axes(envfinal.corners)[2:3]...)) do (r, c) + @tensor Cfix[-1; -2] := + signs[WEST, _prev(r, end), c][-1 1] * + envfinal.corners[NORTHWEST, r, c][1; 2] * + conj(signs[NORTH, r, c][-2 2]) + end + T1 = map(Iterators.product(axes(envfinal.edges)[2:3]...)) do (r, c) + @tensor Tfix[-1 -2 -3; -4] := + signs[NORTH, r, c][-1 1] * + envfinal.edges[NORTH, r, c][1 -2 -3; 2] * + conj(signs[NORTH, r, _next(c, end)][-4 2]) + end + + C2 = map(Iterators.product(axes(envfinal.corners)[2:3]...)) do (r, c) + @tensor Cfix[-1; -2] := + signs[NORTH, r, _next(c, end)][-1 1] * + envfinal.corners[NORTHEAST, r, c][1; 2] * + conj(signs[EAST, r, c][-2 2]) + end + T2 = map(Iterators.product(axes(envfinal.edges)[2:3]...)) do (r, c) + @tensor Tfix[-1 -2 -3; -4] := + signs[EAST, r, c][-1 1] * + envfinal.edges[EAST, r, c][1 -2 -3; 2] * + conj(signs[EAST, _next(r, end), c][-4 2]) + end + + C3 = map(Iterators.product(axes(envfinal.corners)[2:3]...)) do (r, c) + @tensor Cfix[-1; -2] := + signs[EAST, _next(r, end), c][-1 1] * + envfinal.corners[SOUTHEAST, r, c][1; 2] * + conj(signs[SOUTH, r, c][-2 2]) + end + T3 = map(Iterators.product(axes(envfinal.edges)[2:3]...)) do (r, c) + @tensor Tfix[-1 -2 -3; -4] := + signs[SOUTH, r, c][-1 1] * + envfinal.edges[SOUTH, r, c][1 -2 -3; 2] * + conj(signs[SOUTH, r, _prev(c, end)][-4 2]) + end + + C4 = map(Iterators.product(axes(envfinal.corners)[2:3]...)) do (r, c) + @tensor Cfix[-1; -2] := + signs[SOUTH, r, _prev(c, end)][-1 1] * + envfinal.corners[SOUTHWEST, r, c][1; 2] * + conj(signs[WEST, r, c][-2 2]) + end + T4 = map(Iterators.product(axes(envfinal.edges)[2:3]...)) do (r, c) + @tensor Tfix[-1 -2 -3; -4] := + signs[WEST, r, c][-1 1] * + envfinal.edges[WEST, r, c][1 -2 -3; 2] * + conj(signs[WEST, _prev(r, end), c][-4 2]) + end + + return stack([C1, C2, C3, C4]; dims=1), stack([T1, T2, T3, T4]; dims=1) +end + +""" + check_elementwise_convergence(envfinal, envfix; atol=1e-6) + +Check if the element-wise difference of the corner and edge tensors of the final and fixed +CTMRG environments are below some tolerance. +""" +function check_elementwise_convergence( + envfinal::CTMRGEnv, envfix::CTMRGEnv; atol::Real=1e-6 +) + ΔC = envfinal.corners .- envfix.corners + ΔCmax = norm(ΔC, Inf) + ΔCmean = norm(ΔC) + @debug "maxᵢⱼ|Cⁿ⁺¹ - Cⁿ|ᵢⱼ = $ΔCmax mean |Cⁿ⁺¹ - Cⁿ|ᵢⱼ = $ΔCmean" + + ΔT = envfinal.edges .- envfix.edges + ΔTmax = norm(ΔT, Inf) + ΔTmean = norm(ΔT) + @debug "maxᵢⱼ|Tⁿ⁺¹ - Tⁿ|ᵢⱼ = $ΔTmax mean |Tⁿ⁺¹ - Tⁿ|ᵢⱼ = $ΔTmean" + + # Check differences for all tensors in unit cell to debug properly + for (dir, r, c) in Iterators.product(axes(envfinal.edges)...) + @debug( + "$((dir, r, c)): all |Cⁿ⁺¹ - Cⁿ|ᵢⱼ < ϵ: ", + all(x -> abs(x) < atol, convert(Array, ΔC[dir, r, c])), + ) + @debug( + "$((dir, r, c)): all |Tⁿ⁺¹ - Tⁿ|ᵢⱼ < ϵ: ", + all(x -> abs(x) < atol, convert(Array, ΔT[dir, r, c])), + ) + end + + return isapprox(ΔCmax, 0; atol) && isapprox(ΔTmax, 0; atol) +end + +@non_differentiable check_elementwise_convergence(args...) + +""" + ctmrg_iter(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} + +Perform one iteration of CTMRG that maps the `state` and `env` to a new environment, +and also return the truncation error. +One CTMRG iteration consists of four `left_move` calls and 90 degree rotations, +such that the environment is grown and renormalized in all four directions. +""" +function ctmrg_iter(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} ϵ = 0.0 - n0 = 1.0 - n1 = 1.0 - for col in 1:size(peps_above, 2) - cop = mod1(col + 1, size(peps_above, 2)) - com = mod1(col - 1, size(peps_above, 2)) - - above_projs = Vector{above_projector_type}(undef, size(peps_above, 1)) - below_projs = Vector{below_projector_type}(undef, size(peps_above, 1)) - - # find all projectors - for row in 1:size(peps_above, 1) - rop = mod1(row + 1, size(peps_above, 1)) - peps_above_nw = peps_above[row, col] - peps_above_sw = rotate_north(peps_above[rop, col], WEST) - peps_below_nw = peps_below[row, col] - peps_below_sw = rotate_north(peps_below[rop, col], WEST) - - Q1 = northwest_corner( - envs.edges[SOUTH, mod1(row + 1, end), col], - envs.corners[SOUTHWEST, mod1(row + 1, end), col], - envs.edges[WEST, mod1(row + 1, end), col], - peps_above_sw, - peps_below_sw, + + for _ in 1:4 + env, _, _, ϵ₀ = left_move(state, env, alg) + state = rotate_north(state, EAST) + env = rotate_north(env, EAST) + ϵ = max(ϵ, ϵ₀) + end + + return env, ϵ +end + +""" + left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} + +Grow, project and renormalize the environment `env` in west direction. +Return the updated environment as well as the projectors and truncation error. +""" +function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} + corners::typeof(env.corners) = copy(env.corners) + edges::typeof(env.edges) = copy(env.edges) + ϵ = 0.0 + Pleft, Pright = Zygote.Buffer.(projector_type(T, size(state))) # Use Zygote.Buffer instead of @diffset to avoid ZeroTangent errors in _setindex + + for col in 1:size(state, 2) + cnext = _next(col, size(state, 2)) + + # Compute projectors + for row in 1:size(state, 1) + rnext = _next(row, size(state, 1)) + state_nw = state[row, col] + state_sw = rotate_north(state[rnext, col], WEST) + + # Enlarged corners + Q_sw = northwest_corner( + env.edges[SOUTH, _next(row, end), col], + env.corners[SOUTHWEST, _next(row, end), col], + env.edges[WEST, _next(row, end), col], + state_sw, ) - Q2 = northwest_corner( - envs.edges[WEST, row, col], - envs.corners[NORTHWEST, row, col], - envs.edges[NORTH, row, col], - peps_above_nw, - peps_below_nw, + Q_nw = northwest_corner( + env.edges[WEST, row, col], + env.corners[NORTHWEST, row, col], + env.edges[NORTH, row, col], + state_nw, ) + # SVD half-infinite environment trscheme = if alg.fixedspace == true - truncspace(space(envs.edges[WEST, row, cop], 1)) + truncspace(space(env.edges[WEST, row, cnext], 1)) else alg.trscheme end - #@ignore_derivatives @show norm(Q1*Q2) - - (U, S, V) = tsvd(Q1 * Q2; trunc=trscheme, alg=SVD()) - - @ignore_derivatives n0 = norm(Q1 * Q2)^2 - @ignore_derivatives n1 = norm(U * S * V)^2 - @ignore_derivatives ϵ = max(ϵ, (n0 - n1) / n0) - - isqS = sdiag_inv_sqrt(S) - #Q = isqS*U'*Q1; - #P = Q2*V'*isqS; - @tensor Q[-1; -2 -3 -4] := isqS[-1; 1] * conj(U[2 3 4; 1]) * Q1[2 3 4; -2 -3 -4] - @tensor P[-1 -2 -3; -4] := Q2[-1 -2 -3; 1 2 3] * conj(V[4; 1 2 3]) * isqS[4; -4] + U, S, V, ϵ_local = svdwrap(Q_sw * Q_nw, alg.svdalg; trunc=trscheme) + ϵ = max(ϵ, ϵ_local / norm(S)) + # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better + + # Compute SVD truncation error and check for degenerate singular values + ignore_derivatives() do + if alg.verbosity > 0 && is_degenerate_spectrum(S) + svals = TensorKit.SectorDict(c => diag(b) for (c, b) in blocks(S)) + @warn("degenerate singular values detected: ", svals) + end + end - @diffset above_projs[row] = Q - @diffset below_projs[row] = P + # Compute projectors + Pl, Pr = build_projectors(U, S, V, Q_sw, Q_nw) + Pleft[row, col] = Pl + Pright[row, col] = Pr end - #use the projectors to grow the corners/edges - for row in 1:size(peps_above, 1) - Q = above_projs[row] - P = below_projs[mod1(row - 1, end)] - rop = mod1(row + 1, size(peps_above, 1)) - rom = mod1(row - 1, size(peps_above, 1)) - - @diffset @tensor corners[NORTHWEST, rop, cop][-1; -2] := - envs.corners[NORTHWEST, rop, col][1, 2] * - envs.edges[NORTH, rop, col][2, 3, 4, -2] * - Q[-1; 1 3 4] - @diffset @tensor corners[SOUTHWEST, rom, cop][-1; -2] := - envs.corners[SOUTHWEST, rom, col][1, 4] * - envs.edges[SOUTH, rom, col][-1, 2, 3, 1] * - P[4 2 3; -2] - @diffset @tensor edges[WEST, row, cop][-1 -2 -3; -4] := - envs.edges[WEST, row, col][1 2 3; 4] * - peps_above[row, col][9; 5 -2 7 2] * - conj(peps_below[row, col][9; 6 -3 8 3]) * - P[4 5 6; -4] * - Q[-1; 1 7 8] + # Use projectors to grow the corners & edges + for row in 1:size(state, 1) + rprev = _prev(row, size(state, 1)) + rnext = _next(row, size(state, 1)) + C_sw, C_nw, T_w = grow_env_left( + state[row, col], + Pleft[_prev(row, end), col], + Pright[row, col], + env.corners[SOUTHWEST, rprev, col], + env.corners[NORTHWEST, rnext, col], + env.edges[SOUTH, rprev, col], + env.edges[WEST, row, col], + env.edges[NORTH, rnext, col], + ) + @diffset corners[SOUTHWEST, rprev, cnext] = C_sw + @diffset corners[NORTHWEST, rnext, cnext] = C_nw + @diffset edges[WEST, row, cnext] = T_w end - @diffset corners[NORTHWEST, :, cop] ./= norm.(corners[NORTHWEST, :, cop]) - @diffset edges[WEST, :, cop] ./= norm.(edges[WEST, :, cop]) - @diffset corners[SOUTHWEST, :, cop] ./= norm.(corners[SOUTHWEST, :, cop]) + @diffset corners[SOUTHWEST, :, cnext] ./= norm.(corners[SOUTHWEST, :, cnext]) + @diffset corners[NORTHWEST, :, cnext] ./= norm.(corners[NORTHWEST, :, cnext]) + @diffset edges[WEST, :, cnext] ./= norm.(edges[WEST, :, cnext]) end - return CTMRGEnv(peps_above, peps_below, corners, edges), ϵ + return CTMRGEnv(corners, edges), copy(Pleft), copy(Pright), ϵ end +# Compute enlarged NW corner function northwest_corner(E4, C1, E1, peps_above, peps_below=peps_above) @tensor corner[-1 -2 -3; -4 -5 -6] := E4[-1 1 2; 3] * @@ -175,6 +384,8 @@ function northwest_corner(E4, C1, E1, peps_above, peps_below=peps_above) peps_above[7; 5 -5 -2 1] * conj(peps_below[7; 6 -6 -3 2]) end + +# Compute enlarged NE corner function northeast_corner(E1, C2, E2, peps_above, peps_below=peps_above) @tensor corner[-1 -2 -3; -4 -5 -6] := E1[-1 1 2; 3] * @@ -183,6 +394,8 @@ function northeast_corner(E1, C2, E2, peps_above, peps_below=peps_above) peps_above[7; 1 5 -5 -2] * conj(peps_below[7; 2 6 -6 -3]) end + +# Compute enlarged SE corner function southeast_corner(E2, C3, E3, peps_above, peps_below=peps_above) @tensor corner[-1 -2 -3; -4 -5 -6] := E2[-1 1 2; 3] * @@ -192,130 +405,70 @@ function southeast_corner(E2, C3, E3, peps_above, peps_below=peps_above) conj(peps_below[7; -3 2 6 -6]) end -#= - -function MPSKit.leading_boundary(peps::InfinitePEPS,alg::CTMRG2,envs = CTMRGEnv(peps)) - err = Inf - iter = 1 - - old_norm = 1.0 - - while (err > alg.tol && iter <= alg.maxiter) || iter<4 - - for dir in 1:4 - envs = left_move(peps,alg,envs); - - envs = rotate_north(envs,EAST); - peps = envs.peps; - end - new_norm = abs(contract_ctrmg(peps,envs,1,1)) - #@show new_norm - err = abs(old_norm-new_norm) - @ignore_derivatives mod(alg.verbose,alg.miniter)==0 && mod(iter,alg.verbose+1)==0 && @info "$(iter) $(err) $(new_norm)" - - old_norm = new_norm - iter += 1 - end - @ignore_derivatives iter > alg.maxiter && @warn "maxiter $(alg.maxiter) reached: error was $(err)" +# Build projectors from SVD and enlarged SW & NW corners +function build_projectors( + U::AbstractTensorMap{E,3,1}, S, V::AbstractTensorMap{E,1,3}, Q_sw, Q_nw +) where {E<:ElementarySpace} + isqS = sdiag_inv_sqrt(S) + @tensor Pl[-1 -2 -3; -4] := Q_nw[-1 -2 -3; 1 2 3] * conj(V[4; 1 2 3]) * isqS[4; -4] + @tensor Pr[-1; -2 -3 -4] := isqS[-1; 1] * conj(U[2 3 4; 1]) * Q_sw[2 3 4; -2 -3 -4] + return Pl, Pr +end - envs +# Apply projectors to entire left half-environment to grow SW & NW corners, and W edge +function grow_env_left(peps, Pl, Pr, C_sw, C_nw, T_s, T_w, T_n) + @tensor C_sw′[-1; -2] := C_sw[1; 4] * T_s[-1 2 3; 1] * Pl[4 2 3; -2] + @tensor C_nw′[-1; -2] := C_nw[1; 2] * T_n[2 3 4; -2] * Pr[-1; 1 3 4] + @tensor T_w′[-1 -2 -3; -4] := + T_w[1 2 3; 4] * + peps[9; 5 -2 7 2] * + conj(peps[9; 6 -3 8 3]) * + Pl[4 5 6; -4] * + Pr[-1; 1 7 8] + return C_sw′, C_nw′, T_w′ end -# the actual left_move is dependent on the type of ctmrg, so this seems natural -function left_move(peps::InfinitePEPS{PType},alg::CTMRG2,envs::CTMRGEnv) where PType - corners::typeof(envs.corners) = copy(envs.corners); - edges::typeof(envs.edges) = copy(envs.edges); - - above_projector_type = tensormaptype(spacetype(PType),1,3,storagetype(PType)); - below_projector_type = tensormaptype(spacetype(PType),3,1,storagetype(PType)); - - for col in 1:size(peps,2) - cop = mod1(col+1, size(peps,2)) - above_projs = Vector{above_projector_type}(undef,size(peps,1)); - below_projs = Vector{below_projector_type}(undef,size(peps,1)); - - # find all projectors - for row in 1:size(peps,1) - rop = mod1(row+1, size(peps,1)) - peps_nw = peps[row,col]; - peps_sw = rotate_north(peps[rop,col],WEST); - - Q1 = northwest_corner(envs.edges[WEST,row,col],envs.corners[NORTHWEST,row,col], envs.edges[NORTH,row,col],peps_nw); - Q2 = northeast_corner(envs.edges[NORTH,row,cop],envs.corners[NORTHEAST,row,cop],envs.edges[EAST,row,cop],peps[row,cop]) - Q3 = southeast_corner(envs.edges[EAST,rop,cop],envs.corners[SOUTHEAST,rop,cop],envs.edges[SOUTH,rop,cop],peps[rop,cop]) - Q4 = northwest_corner(envs.edges[SOUTH,rop,col],envs.corners[SOUTHWEST,rop,col],envs.edges[WEST,rop,col],peps_sw); - Qnorth = Q1*Q2 - Qsouth = Q3*Q4 - (U,S,V) = tsvd(Qsouth*Qnorth, trunc = alg.trscheme); - #@ignore_derivatives @show ϵ = real(norm(Qsouth*Qnorth)^2-norm(U*S*V)^2) - #@ignore_derivatives @info ϵ - isqS = sdiag_inv_sqrt(S); - Q = isqS*U'*Qsouth; - P = Qnorth*V'*isqS; - - @diffset above_projs[row] = Q; - @diffset below_projs[row] = P; - end +@doc """ + LinearAlgebra.norm(peps::InfinitePEPS, env::CTMRGEnv) - #use the projectors to grow the corners/edges - for row in 1:size(peps,1) - Q = above_projs[row]; - P = below_projs[mod1(row-1,end)]; - - @diffset @tensor corners[NORTHWEST,row+1,col+1][-1;-2] := envs.corners[NORTHWEST,row+1,col][1,2] * envs.edges[NORTH,row+1,col][2,3,4,-2]*Q[-1;1 3 4] - @diffset @tensor corners[SOUTHWEST,row-1,col+1][-1;-2] := envs.corners[SOUTHWEST,row-1,col][1,4] * envs.edges[SOUTH,row-1,col][-1,2,3,1]*P[4 2 3;-2] - @diffset @tensor edges[WEST,row,col+1][-1 -2 -3;-4] := envs.edges[WEST,row,col][1 2 3;4]* - peps[row,col][9;5 -2 7 2]* - conj(peps[row,col][9;6 -3 8 3])* - P[4 5 6;-4]* - Q[-1;1 7 8] - end +Compute the norm of a PEPS contracted with a CTM environment. +""" - @diffset corners[NORTHWEST,:,col+1]./=norm.(corners[NORTHWEST,:,col+1]); - @diffset corners[SOUTHWEST,:,col+1]./=norm.(corners[SOUTHWEST,:,col+1]); - @diffset edges[WEST,:,col+1]./=norm.(edges[WEST,:,col+1]); - end +function LinearAlgebra.norm(peps::InfinitePEPS, env::CTMRGEnv) + total = one(scalartype(peps)) - CTMRGEnv(peps,corners,edges); -end -=# + for r in 1:size(peps, 1), c in 1:size(peps, 2) + total *= @tensor env.edges[WEST, r, c][1 2 3; 4] * + env.corners[NORTHWEST, r, c][4; 5] * + env.edges[NORTH, r, c][5 6 7; 8] * + env.corners[NORTHEAST, r, c][8; 9] * + env.edges[EAST, r, c][9 10 11; 12] * + env.corners[SOUTHEAST, r, c][12; 13] * + env.edges[SOUTH, r, c][13 14 15; 16] * + env.corners[SOUTHWEST, r, c][16; 1] * + peps[r, c][17; 6 10 14 2] * + conj(peps[r, c][17; 7 11 15 3]) -function contract_ctrmg( - envs::CTMRGEnv, peps_above=envs.peps_above, peps_below=envs.peps_below -) - total = 1.0 + 0im - - for r in 1:size(peps_above, 1), c in 1:size(peps_above, 2) - total *= @tensor envs.edges[WEST, r, c][1 2 3; 4] * - envs.corners[NORTHWEST, r, c][4; 5] * - envs.edges[NORTH, r, c][5 6 7; 8] * - envs.corners[NORTHEAST, r, c][8; 9] * - envs.edges[EAST, r, c][9 10 11; 12] * - envs.corners[SOUTHEAST, r, c][12; 13] * - envs.edges[SOUTH, r, c][13 14 15; 16] * - envs.corners[SOUTHWEST, r, c][16; 1] * - peps_above[r, c][17; 6 10 14 2] * - conj(peps_below[r, c][17; 7 11 15 3]) total *= tr( - envs.corners[NORTHWEST, r, c] * - envs.corners[NORTHEAST, r, mod1(c - 1, end)] * - envs.corners[SOUTHEAST, mod1(r - 1, end), mod1(c - 1, end)] * - envs.corners[SOUTHWEST, mod1(r - 1, end), c], + env.corners[NORTHWEST, r, c] * + env.corners[NORTHEAST, r, mod1(c - 1, end)] * + env.corners[SOUTHEAST, mod1(r - 1, end), mod1(c - 1, end)] * + env.corners[SOUTHWEST, mod1(r - 1, end), c], ) - total /= @tensor envs.edges[WEST, r, c][1 10 11; 4] * - envs.corners[NORTHWEST, r, c][4; 5] * - envs.corners[NORTHEAST, r, mod1(c - 1, end)][5; 6] * - envs.edges[EAST, r, mod1(c - 1, end)][6 10 11; 7] * - envs.corners[SOUTHEAST, r, mod1(c - 1, end)][7; 8] * - envs.corners[SOUTHWEST, r, c][8; 1] - - total /= @tensor envs.corners[NORTHWEST, r, c][1; 2] * - envs.edges[NORTH, r, c][2 10 11; 3] * - envs.corners[NORTHEAST, r, c][3; 4] * - envs.corners[SOUTHEAST, mod1(r - 1, end), c][4; 5] * - envs.edges[SOUTH, mod1(r - 1, end), c][5 10 11; 6] * - envs.corners[SOUTHWEST, mod1(r - 1, end), c][6; 1] + total /= @tensor env.edges[WEST, r, c][1 10 11; 4] * + env.corners[NORTHWEST, r, c][4; 5] * + env.corners[NORTHEAST, r, mod1(c - 1, end)][5; 6] * + env.edges[EAST, r, mod1(c - 1, end)][6 10 11; 7] * + env.corners[SOUTHEAST, r, mod1(c - 1, end)][7; 8] * + env.corners[SOUTHWEST, r, c][8; 1] + + total /= @tensor env.corners[NORTHWEST, r, c][1; 2] * + env.edges[NORTH, r, c][2 10 11; 3] * + env.corners[NORTHEAST, r, c][3; 4] * + env.corners[SOUTHEAST, mod1(r - 1, end), c][4; 5] * + env.edges[SOUTH, mod1(r - 1, end), c][5 10 11; 6] * + env.corners[SOUTHWEST, mod1(r - 1, end), c][6; 1] end return total From fc6600ecc6d7ef5bd3d3f1f3497e8eb147212d68 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Thu, 4 Jul 2024 18:03:00 +0200 Subject: [PATCH 08/27] Use KrylovKit implementation of eigsolve instead of eigsolve.jl, delete svdwrap wrapper and instead directly use tsvd, add KrylovKit compat entry --- Project.toml | 2 +- src/PEPSKit.jl | 3 +- src/algorithms/ctmrg.jl | 4 +- src/utility/eigsolve.jl | 251 ---------------------- src/utility/svd.jl | 155 +++++-------- test/{svdwrap.jl => ctmrg/svd_wrapper.jl} | 24 +-- test/runtests.jl | 3 + 7 files changed, 72 insertions(+), 370 deletions(-) delete mode 100644 src/utility/eigsolve.jl rename test/{svdwrap.jl => ctmrg/svd_wrapper.jl} (74%) diff --git a/Project.toml b/Project.toml index f8b9f357..98a70c11 100644 --- a/Project.toml +++ b/Project.toml @@ -22,7 +22,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" Accessors = "0.1" ChainRulesCore = "1.0" Compat = "3.46, 4.2" -KrylovKit = "0.6, 0.7, 0.8" +KrylovKit = "0.8" LinearAlgebra = "1" MPSKit = "0.10" OptimKit = "0.3" diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index f0cbc7fb..1ee02e1f 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -10,7 +10,6 @@ using ChainRulesCore, Zygote include("utility/util.jl") include("utility/svd.jl") -include("utility/eigsolve.jl") include("utility/rotations.jl") include("utility/hook_pullback.jl") @@ -59,7 +58,7 @@ module Defaults const fpgrad_tol = 1e-6 end -export FullSVD, IterSVD, OldSVD, svdwrap, itersvd +export FullSVD, IterSVD, OldSVD export CTMRG, CTMRGEnv export NLocalOperator, AnisotropicNNOperator, OnSite, NearestNeighbor export expectation_value, costfun diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index a69d6da9..70cbe133 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -17,7 +17,7 @@ Regardless of the truncation scheme, the space can be kept fixed with `fixedspac maxiter::Int = Defaults.ctmrg_maxiter miniter::Int = Defaults.ctmrg_miniter verbosity::Int = 0 - svdalg::S = FullSVD() + svdalg::S = TensorKit.SVD() trscheme::TruncationScheme = TensorKit.notrunc() fixedspace::Bool = false end @@ -336,7 +336,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} alg.trscheme end @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6] - U, S, V, ϵ_local = svdwrap(QQ, alg.svdalg; trunc=trscheme) + U, S, V, ϵ_local = tsvd(QQ; trunc=trscheme, alg=alg.svdalg) ϵ = max(ϵ, ϵ_local / norm(S)) # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better diff --git a/src/utility/eigsolve.jl b/src/utility/eigsolve.jl deleted file mode 100644 index 975adf49..00000000 --- a/src/utility/eigsolve.jl +++ /dev/null @@ -1,251 +0,0 @@ -# Copied from Jutho/KrylovKit.jl/pull/56, with minor tweaks - -function ChainRulesCore.rrule( - ::typeof(eigsolve), A::AbstractMatrix, x₀, howmany, which, alg -) - (vals, vecs, info) = eigsolve(A, x₀, howmany, which, alg) - project_A = ProjectTo(A) - T = eltype(vecs[1]) # will be real for real symmetric problems and complex otherwise - - function eigsolve_pullback(ΔX) - _Δvals = unthunk(ΔX[1]) - _Δvecs = unthunk(ΔX[2]) - - ∂self = NoTangent() - ∂x₀ = ZeroTangent() - ∂howmany = NoTangent() - ∂which = NoTangent() - ∂alg = NoTangent() - if _Δvals isa AbstractZero && _Δvecs isa AbstractZero - ∂A = ZeroTangent() - return ∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg - end - - if _Δvals isa AbstractZero - Δvals = fill(NoTangent(), length(Δvecs)) - else - Δvals = _Δvals - end - if _Δvecs isa AbstractZero - Δvecs = fill(NoTangent(), length(Δvals)) - else - Δvecs = _Δvecs - end - - @assert length(Δvals) == length(Δvecs) - @assert length(Δvals) <= length(vals) - - # Determine algorithm to solve linear problem - # TODO: Is there a better choice? Should we make this user configurable? - linalg = GMRES(; - tol=alg.tol, krylovdim=alg.krylovdim, maxiter=alg.maxiter, orth=alg.orth - ) - - ws = similar(vecs, length(Δvecs)) - for i in 1:length(Δvecs) - Δλ = Δvals[i] - Δv = Δvecs[i] - λ = vals[i] - v = vecs[i] - - # First threat special cases - if isa(Δv, AbstractZero) && isa(Δλ, AbstractZero) # no contribution - ws[i] = Δv # some kind of zero - continue - end - if isa(Δv, AbstractZero) && isa(alg, Lanczos) # simple contribution - ws[i] = Δλ * v - continue - end - - # General case : - if isa(Δv, AbstractZero) - b = RecursiveVec(zero(T) * v, T[Δλ]) - else - @assert isa(Δv, typeof(v)) - b = RecursiveVec(Δv, T[Δλ]) - end - - if i > 1 && - eltype(A) <: Real && - vals[i] == conj(vals[i - 1]) && - Δvals[i] == conj(Δvals[i - 1]) && - vecs[i] == conj(vecs[i - 1]) && - Δvecs[i] == conj(Δvecs[i - 1]) - ws[i] = conj(ws[i - 1]) - continue - end - - w, reverse_info = let λ = λ, v = v, Aᴴ = A' - linsolve(b, zero(T) * b, linalg) do x - x1, x2 = x - γ = 1 - # γ can be chosen freely and does not affect the solution theoretically - # The current choice guarantees that the extended matrix is Hermitian if A is - # TODO: is this the best choice in all cases? - y1 = axpy!(-γ * x2[], v, axpy!(-conj(λ), x1, A' * x1)) - y2 = T[-dot(v, x1)] - return RecursiveVec(y1, y2) - end - end - if info.converged >= i && reverse_info.converged == 0 - @warn "The cotangent linear problem did not converge, whereas the primal eigenvalue problem did." - end - ws[i] = w[1] - end - - if A isa StridedMatrix - ∂A = InplaceableThunk( - Ā -> _buildĀ!(Ā, ws, vecs), @thunk(_buildĀ!(zero(A), ws, vecs)) - ) - else - ∂A = @thunk(project_A(_buildĀ!(zero(A), ws, vecs))) - end - return ∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg - end - return (vals, vecs, info), eigsolve_pullback -end - -function _buildĀ!(Ā, ws, vs) - for i in 1:length(ws) - w = ws[i] - v = vs[i] - if !(w isa AbstractZero) - if eltype(Ā) <: Real && eltype(w) <: Complex - mul!(Ā, _realview(w), _realview(v)', -1, 1) - mul!(Ā, _imagview(w), _imagview(v)', -1, 1) - else - mul!(Ā, w, v', -1, 1) - end - end - end - return Ā -end -function _realview(v::AbstractVector{Complex{T}}) where {T} - return view(reinterpret(T, v), 2 * (1:length(v)) .- 1) -end -function _imagview(v::AbstractVector{Complex{T}}) where {T} - return view(reinterpret(T, v), 2 * (1:length(v))) -end - -function ChainRulesCore.rrule( - config::RuleConfig{>:HasReverseMode}, - ::typeof(eigsolve), - A::AbstractMatrix, - x₀, - howmany, - which, - alg, -) - return ChainRulesCore.rrule(eigsolve, A, x₀, howmany, which, alg) -end - -function ChainRulesCore.rrule( - config::RuleConfig{>:HasReverseMode}, ::typeof(eigsolve), f, x₀, howmany, which, alg -) - (vals, vecs, info) = eigsolve(f, x₀, howmany, which, alg) - resize!(vecs, howmany) - resize!(vals, howmany) - T = typeof(dot(vecs[1], vecs[1])) - f_pullbacks = map(x -> rrule_via_ad(config, f, x)[2], vecs) - function eigsolve_pullback(ΔX) - _Δvals = unthunk(ΔX[1]) - _Δvecs = unthunk(ΔX[2]) - - ∂self = NoTangent() - ∂x₀ = ZeroTangent() - ∂howmany = NoTangent() - ∂which = NoTangent() - ∂alg = NoTangent() - if _Δvals isa AbstractZero && _Δvecs isa AbstractZero - ∂A = ZeroTangent() - return (∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg) - end - - if _Δvals isa AbstractZero - Δvals = fill(NoTangent(), howmany) - else - Δvals = _Δvals - end - if _Δvecs isa AbstractZero - Δvecs = fill(NoTangent(), howmany) - else - Δvecs = _Δvecs - end - - # filter ZeroTangents, added compared to Jutho/KrylovKit.jl/pull/56 - Δvecs = filter(x -> !(x isa AbstractZero), Δvecs) - @assert length(Δvals) == length(Δvecs) - - # Determine algorithm to solve linear problem - # TODO: Is there a better choice? Should we make this user configurable? - linalg = GMRES(; - tol=alg.tol, - krylovdim=alg.krylovdim + 10, - maxiter=alg.maxiter * 10, - orth=alg.orth, - ) - # linalg = BiCGStab(; - # tol = alg.tol, - # maxiter = alg.maxiter*alg.krylovdim, - # ) - - ws = similar(Δvecs) - for i in 1:length(Δvecs) - Δλ = Δvals[i] - Δv = Δvecs[i] - λ = vals[i] - v = vecs[i] - - # First threat special cases - if isa(Δv, AbstractZero) && isa(Δλ, AbstractZero) # no contribution - ws[i] = 0 * v # some kind of zero - continue - end - if isa(Δv, AbstractZero) && isa(alg, Lanczos) # simple contribution - ws[i] = Δλ * v - continue - end - - # General case : - if isa(Δv, AbstractZero) - b = RecursiveVec(zero(T) * v, T[-Δλ]) - else - @assert isa(Δv, typeof(v)) - b = RecursiveVec(-Δv, T[-Δλ]) - end - - # TODO: is there any analogy to this for general vector-like user types - # if i > 1 && eltype(A) <: Real && - # vals[i] == conj(vals[i-1]) && Δvals[i] == conj(Δvals[i-1]) && - # vecs[i] == conj(vecs[i-1]) && Δvecs[i] == conj(Δvecs[i-1]) - # - # ws[i] = conj(ws[i-1]) - # continue - # end - - w, reverse_info = let λ = λ, v = v, fᴴ = x -> f_pullbacks[i](x)[2] - linsolve(b, zero(T) * b, linalg) do x - x1, x2 = x - γ = 1 - # γ can be chosen freely and does not affect the solution theoretically - # The current choice guarantees that the extended matrix is Hermitian if A is - # TODO: is this the best choice in all cases? - y1 = axpy!(-γ * x2[], v, axpy!(-conj(λ), x1, fᴴ(x1))) - y2 = T[-dot(v, x1)] - return RecursiveVec(y1, y2) - end - end - if info.converged >= i && reverse_info.converged == 0 - @warn "The cotangent linear problem ($i) did not converge, whereas the primal eigenvalue problem did." reverse_info b - end - ws[i] = w[1] - end - ∂f = f_pullbacks[1](ws[1])[1] - for i in 2:length(ws) - ∂f = VectorInterface.add!!(∂f, f_pullbacks[i](ws[i])[1]) - end - return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg - end - return (vals, vecs, info), eigsolve_pullback -end diff --git a/src/utility/svd.jl b/src/utility/svd.jl index d06b27c8..ebcc9784 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -1,108 +1,56 @@ -import TensorKit: - SectorDict, - _empty_svdtensors, - _compute_svddata!, - _truncate!, - _implement_svdtruncation!, - _create_svdtensors - -# Plain copy of tsvd!(...) from TensorKit to lift alg type restriction -function _tensorkit_svd!( - t::TensorMap; - trunc::TruncationScheme=TensorKit.NoTruncation(), - p::Real=2, - alg=TensorKit.SVD(), -) - #early return - if isempty(blocksectors(t)) - truncerr = zero(real(scalartype(t))) - return _empty_svdtensors(t)..., truncerr - end - - S = spacetype(t) - Udata, Σdata, Vdata, dims = _compute_svddata!(t, alg) - if !isa(trunc, TensorKit.NoTruncation) - Σdata, truncerr = _truncate!(Σdata, trunc, p) - Udata, Σdata, Vdata, dims = _implement_svdtruncation!(t, Udata, Σdata, Vdata, dims) - W = S(dims) - else - truncerr = abs(zero(scalartype(t))) - W = S(dims) - if length(domain(t)) == 1 && domain(t)[1] ≅ W - W = domain(t)[1] - elseif length(codomain(t)) == 1 && codomain(t)[1] ≅ W - W = codomain(t)[1] - end - end - return _create_svdtensors(t, Udata, Σdata, Vdata, W)..., truncerr -end - -# Wrapper struct around TensorKit's SVD algorithms -@kwdef struct FullSVD - alg::Union{<:TensorKit.SVD,<:TensorKit.SDD} = TensorKit.SVD() - lorentz_broad::Float64 = 0.0 -end - -function svdwrap(t::AbstractTensorMap, alg::FullSVD; trunc=notrunc(), kwargs...) - # TODO: Replace _tensorkit_svd! with just tsvd eventually to use the full TensorKit machinery - return _tensorkit_svd!(copy(t); trunc, alg.alg) -end - -function ChainRulesCore.rrule( - ::typeof(svdwrap), t::AbstractTensorMap, alg::FullSVD; trunc=notrunc(), kwargs... -) - tsvd_return, tsvd!_pullback = ChainRulesCore.rrule(tsvd!, t; alg=TensorKit.SVD(), trunc) - function svdwrap_fullsvd_pullback(Δ) - return tsvd!_pullback(Δ)..., NoTangent() - end - return tsvd_return, svdwrap_fullsvd_pullback -end +using TensorKit: SectorDict # Wrapper around Krylov Kit's GKL iterative SVD solver @kwdef struct IterSVD alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25) - howmany::Int = 20 lorentz_broad::Float64 = 0.0 alg_rrule::Union{GMRES,BiCGStab,Arnoldi} = GMRES(; tol=1e-14) end -function svdwrap(t::AbstractTensorMap, alg::IterSVD; trunc=notrunc(), kwargs...) - U, S, V, = _tensorkit_svd!(copy(t); trunc, alg) # TODO: Also replace this with tsvd eventually - ϵ = norm(t - U * S * V) # Compute truncation error separately - return U, S, V, ϵ +# Compute SVD data block-wise using KrylovKit algorithm +function TensorKit._tsvd!(t, alg::IterSVD, trunc::TruncationScheme, p::Real=2) + # TODO end -# Compute SVD data block-wise using KrylovKit algorithm -function TensorKit._compute_svddata!(t::TensorMap, alg::IterSVD) - InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!) - I = sectortype(t) - A = storagetype(t) - Udata = SectorDict{I,A}() - Vdata = SectorDict{I,A}() - dims = SectorDict{I,Int}() - local Σdata - for (c, b) in blocks(t) - x₀ = randn(eltype(b), size(b, 1)) - Σ, lvecs, rvecs, info = svdsolve(b, x₀, alg.howmany, :LR, alg.alg) - if info.converged < alg.howmany # Fall back to dense SVD if not properly converged - U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) - Udata[c] = U - Vdata[c] = V - else - Udata[c] = stack(lvecs) - Vdata[c] = stack(rvecs)' - end - if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction - Σdata[c] = Σ - else - Σdata = SectorDict(c => Σ) - end - dims[c] = length(Σ) - end - return Udata, Σdata, Vdata, dims +# function TensorKit._compute_svddata!(t::TensorMap, alg::IterSVD) +# InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!) +# I = sectortype(t) +# A = storagetype(t) +# Udata = SectorDict{I,A}() +# Vdata = SectorDict{I,A}() +# dims = SectorDict{I,Int}() +# local Σdata +# for (c, b) in blocks(t) +# x₀ = randn(eltype(b), size(b, 1)) +# Σ, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, alg.howmany, :LR, alg.alg) +# if info.converged < alg.howmany # Fall back to dense SVD if not properly converged +# U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) +# Udata[c] = U +# Vdata[c] = V +# else +# Udata[c] = stack(lvecs) +# Vdata[c] = stack(rvecs)' +# end +# if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction +# Σdata[c] = Σ +# else +# Σdata = SectorDict(c => Σ) +# end +# dims[c] = length(Σ) +# end +# return Udata, Σdata, Vdata, dims +# end + +function ChainRulesCore.rrule( + ::typeof(TensorKit.tsvd), + t::AbstractTensorMap; + trunc::TruncationScheme=notrunc(), + p::Real=2, + alg::IterSVD=IterSVD(), +) + # TODO: IterSVD adjoint utilizing KryloVKit svdsolve adjoint end -# TODO: IterSVD adjoint utilizing KryloVKit svdsolve adjoint # Full SVD with old adjoint that doesn't account for truncation properly @kwdef struct OldSVD{A<:Union{FullSVD,IterSVD}} @@ -110,17 +58,22 @@ end lorentz_broad::Float64 = 0.0 end -function svdwrap(t::AbstractTensorMap, alg::OldSVD; kwargs...) - return svdwrap(t, alg.alg; kwargs...) +# Perform TensorKit.SVD in forward pass +function TensorKit._tsvd!(t, ::OldSVD, trunc::TruncationScheme, p::Real=2) + return TensorKit._tsvd(t, TensorKit.SVD(), trunc, p) end -# Outdated adjoint not taking truncated part into account for testing purposes +# Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes) function ChainRulesCore.rrule( - ::typeof(svdwrap), t::AbstractTensorMap, alg::OldSVD; kwargs... + ::typeof(TensorKit.tsvd), + t::AbstractTensorMap; + trunc::TruncationScheme=notrunc(), + p::Real=2, + alg::OldSVD=OldSVD(), ) - U, S, V, ϵ = svdwrap(t, alg; kwargs...) + U, S, V, ϵ = tsvd(t; trunc, p, alg) - function svdwrap_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + function tsvd_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) ∂t = similar(t) for (c, b) in blocks(∂t) copyto!( @@ -139,7 +92,7 @@ function ChainRulesCore.rrule( return NoTangent(), ∂t, NoTangent() end - return (U, S, V, ϵ), svdwrap_oldsvd_pullback + return (U, S, V, ϵ), tsvd_oldsvd_pullback end function oldsvd_rev( @@ -204,4 +157,4 @@ end function _lorentz_broaden(x::Real, ε=1e-12) x′ = 1 / x return x′ / (x′^2 + ε) -end \ No newline at end of file +end diff --git a/test/svdwrap.jl b/test/ctmrg/svd_wrapper.jl similarity index 74% rename from test/svdwrap.jl rename to test/ctmrg/svd_wrapper.jl index 3c3a0748..ab0989df 100644 --- a/test/svdwrap.jl +++ b/test/ctmrg/svd_wrapper.jl @@ -8,7 +8,7 @@ using PEPSKit # Gauge-invariant loss function function lossfun(A, alg, R=TensorMap(randn, space(A)); trunc=notrunc()) - U, _, V, = svdwrap(A, alg; trunc) + U, _, V, = tsvd(A; trunc, alg) return real(dot(R, U * V)) # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n end @@ -25,27 +25,25 @@ R = TensorMap(randn, space(r)) @testset "Non-truncacted SVD" begin l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R), r) l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r) - # l_itersvd, g_itersvd = withgradient( - # A -> lossfun(A, IterSVD(; howmany=min(m, n), adjoint_tol), R), r - # ) + l_itersvd, g_itersvd = withgradient( + A -> lossfun(A, IterSVD(; howmany=min(m, n), adjoint_tol), R), r + ) - @test l_oldsvd ≈ l_fullsvd - # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd + @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol - # @test norm(g_tsvd[1] - g_itersvd[1]) / norm(g_tsvd[1]) < rtol + @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol end @testset "Truncated SVD with χ=$χ" begin l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R; trunc), r) l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R; trunc), r) - # l_itersvd, g_itersvd = withgradient( - # A -> lossfun(A, IterSVD(; howmany=χ, adjoint_tol), R; trunc), r - # ) + l_itersvd, g_itersvd = withgradient( + A -> lossfun(A, IterSVD(; howmany=χ, adjoint_tol), R; trunc), r + ) - @test l_oldsvd ≈ l_fullsvd - # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd + @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol - # @test norm(g_tsvd[1] - g_itersvd[1]) / norm(g_tsvd[1]) < rtol + @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol end # @testset "Truncated SVD with χ=$χ and ε=$lorentz_broad broadening" begin diff --git a/test/runtests.jl b/test/runtests.jl index af2023e1..33e2fc42 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,9 @@ const GROUP = get(ENV, "GROUP", "All") @time @safetestset "Gradients" begin include("ctmrg/gradients.jl") end + @time @safetestset "Gradients" begin + include("ctmrg/svd_wrapper.jl") + end end if GROUP == "All" || GROUP == "MPS" @time @safetestset "VUMPS" begin From 823f52ef331e761e5831a79d3a1b30882d6b9e5f Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Fri, 5 Jul 2024 12:18:30 +0200 Subject: [PATCH 09/27] Add IterSVD _tsvd! method and adjoint using KrylovKit.svdsolve adjoint --- src/utility/svd.jl | 131 +++++++++++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 39 deletions(-) diff --git a/src/utility/svd.jl b/src/utility/svd.jl index ebcc9784..6995fc94 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -8,53 +8,106 @@ using TensorKit: SectorDict end # Compute SVD data block-wise using KrylovKit algorithm -function TensorKit._tsvd!(t, alg::IterSVD, trunc::TruncationScheme, p::Real=2) - # TODO -end +function _tsvd!( + t, + alg::IterSVD, + trunc::Union{TensorKit.NoTruncation,TensorKit.TruncationSpace}, + p::Real=2, +) + # early return + if isempty(blocksectors(t)) + truncerr = zero(real(scalartype(t))) + return _empty_svdtensors(t)..., truncerr + end + + Udata, Σdata, Vdata, dims = _compute_svddata!(t, alg, trunc) + U, S, V = _create_svdtensors(t, Udata, Σdata, Vdata, spacetype(t)(dims)) + truncerr = trunc isa NoTruncation ? abs(zero(scalartype(t))) : norm(U * S * V - t, p) -# function TensorKit._compute_svddata!(t::TensorMap, alg::IterSVD) -# InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!) -# I = sectortype(t) -# A = storagetype(t) -# Udata = SectorDict{I,A}() -# Vdata = SectorDict{I,A}() -# dims = SectorDict{I,Int}() -# local Σdata -# for (c, b) in blocks(t) -# x₀ = randn(eltype(b), size(b, 1)) -# Σ, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, alg.howmany, :LR, alg.alg) -# if info.converged < alg.howmany # Fall back to dense SVD if not properly converged -# U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) -# Udata[c] = U -# Vdata[c] = V -# else -# Udata[c] = stack(lvecs) -# Vdata[c] = stack(rvecs)' -# end -# if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction -# Σdata[c] = Σ -# else -# Σdata = SectorDict(c => Σ) -# end -# dims[c] = length(Σ) -# end -# return Udata, Σdata, Vdata, dims -# end + return U, S, V, truncerr +end +function TensorKit._compute_svddata!( + t::TensorMap, + alg::IterSVD, + trunc::Union{TensorKit.NoTruncation,TensorKit.TruncationSpace}, +) + InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!) + I = sectortype(t) + A = storagetype(t) + Udata = SectorDict{I,A}() + Vdata = SectorDict{I,A}() + dims = SectorDict{I,Int}() + local Σdata + for (c, b) in blocks(t) + x₀ = randn(eltype(b), size(b, 1)) + howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c) + Σ, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg) + if info.converged < howmany # Fall back to dense SVD if not properly converged + U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) + Udata[c] = U + Vdata[c] = V + else + Udata[c] = stack(lvecs) + Vdata[c] = stack(rvecs)' + end + if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction + Σdata[c] = Σ + else + Σdata = SectorDict(c => Σ) + end + dims[c] = length(Σ) + end + return Udata, Σdata, Vdata, dims +end +# IterSVD adjoint for tsvd! using KrylovKit.svdsolve adjoint machinery for each block function ChainRulesCore.rrule( - ::typeof(TensorKit.tsvd), + ::typeof(TensorKit.tsvd!), t::AbstractTensorMap; trunc::TruncationScheme=notrunc(), p::Real=2, alg::IterSVD=IterSVD(), ) - # TODO: IterSVD adjoint utilizing KryloVKit svdsolve adjoint -end + U, S, V, ϵ = tsvd(t; trunc, p, alg) + function tsvd!_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + Δt = similar(t) + for (c, b) in blocks(Δt) + Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) + ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c) + Sdc = view(Sc, diagind(Sc)) + ΔSdc = (ΔSc isa AbstractZero) ? ΔSc : view(ΔSc, diagind(ΔSc)) + + lvecs = eachcol(Uc) + rvecs = eachrow(Vc) + minimal_info = KrylovKit.ConvergenceInfo(length(Sdc), nothing, nothing, -1, -1) # Just supply converged to SVD pullback + xs, ys = compute_svdsolve_pullback_data( + ΔSdc, + eachcol(ΔUc), + eachrow(ΔVc), + Sdc, + lvecs, + rvecs, + minimal_info, + b, + :LR, + alg.alg, + alg.alg_rrule, + ) + copyto!(b, construct∂f_svd(config, b, lvecs, rvecs, xs, ys)) + end + return NoTangent(), Δt + end + function tsvd!_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + return NoTangent(), ZeroTangent() + end + + return (U, S, V, ϵ), tsvd!_itersvd_pullback +end # Full SVD with old adjoint that doesn't account for truncation properly -@kwdef struct OldSVD{A<:Union{FullSVD,IterSVD}} - alg::A = FullSVD() +@kwdef struct OldSVD{A<:Union{TensorKit.SDD,TensorKit.SVD,IterSVD}} + alg::A = TensorKit.SVD() lorentz_broad::Float64 = 0.0 end @@ -65,7 +118,7 @@ end # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes) function ChainRulesCore.rrule( - ::typeof(TensorKit.tsvd), + ::typeof(TensorKit.tsvd!), t::AbstractTensorMap; trunc::TruncationScheme=notrunc(), p::Real=2, @@ -73,7 +126,7 @@ function ChainRulesCore.rrule( ) U, S, V, ϵ = tsvd(t; trunc, p, alg) - function tsvd_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + function tsvd!_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) ∂t = similar(t) for (c, b) in blocks(∂t) copyto!( @@ -92,7 +145,7 @@ function ChainRulesCore.rrule( return NoTangent(), ∂t, NoTangent() end - return (U, S, V, ϵ), tsvd_oldsvd_pullback + return (U, S, V, ϵ), tsvd!_oldsvd_pullback end function oldsvd_rev( From a615b4a22744093e594acb842ae650669444b27f Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Fri, 5 Jul 2024 16:44:21 +0200 Subject: [PATCH 10/27] Add PEPSKit.tsvd wrapper, fix IterSVD adjoint --- src/algorithms/ctmrg.jl | 2 +- src/utility/svd.jl | 133 +++++++++++++++++++++----------------- test/ctmrg/svd_wrapper.jl | 23 +++---- 3 files changed, 88 insertions(+), 70 deletions(-) diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index 70cbe133..5b4d65dc 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -336,7 +336,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} alg.trscheme end @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6] - U, S, V, ϵ_local = tsvd(QQ; trunc=trscheme, alg=alg.svdalg) + U, S, V, ϵ_local = PEPSKit.tsvd(QQ, alg.svdalg; trunc=trscheme) ϵ = max(ϵ, ϵ_local / norm(S)) # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 6995fc94..1dae6534 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -1,4 +1,19 @@ -using TensorKit: SectorDict +using TensorKit: + SectorDict, + _tsvd!, + _empty_svdtensors, + _compute_svddata!, + _create_svdtensors, + NoTruncation, + TruncationSpace +CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) + +# SVD wrapper for TensorKit.tsvd that dispatches on the alg type +function PEPSKit.tsvd( + t::AbstractTensorMap, alg; trunc::TruncationScheme=notrunc(), p::Real=2 +) + return TensorKit.tsvd(t; alg, trunc, p) +end # Wrapper around Krylov Kit's GKL iterative SVD solver @kwdef struct IterSVD @@ -8,11 +23,8 @@ using TensorKit: SectorDict end # Compute SVD data block-wise using KrylovKit algorithm -function _tsvd!( - t, - alg::IterSVD, - trunc::Union{TensorKit.NoTruncation,TensorKit.TruncationSpace}, - p::Real=2, +function TensorKit._tsvd!( + t, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2 ) # early return if isempty(blocksectors(t)) @@ -27,9 +39,7 @@ function _tsvd!( return U, S, V, truncerr end function TensorKit._compute_svddata!( - t::TensorMap, - alg::IterSVD, - trunc::Union{TensorKit.NoTruncation,TensorKit.TruncationSpace}, + t::TensorMap, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace} ) InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!) I = sectortype(t) @@ -37,72 +47,83 @@ function TensorKit._compute_svddata!( Udata = SectorDict{I,A}() Vdata = SectorDict{I,A}() dims = SectorDict{I,Int}() - local Σdata + local Sdata for (c, b) in blocks(t) x₀ = randn(eltype(b), size(b, 1)) howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c) - Σ, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg) + S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg) if info.converged < howmany # Fall back to dense SVD if not properly converged - U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) + U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) Udata[c] = U Vdata[c] = V - else - Udata[c] = stack(lvecs) - Vdata[c] = stack(rvecs)' + else # Slice in case more values were converged than requested + Udata[c] = stack(lvecs[1:howmany]) + Vdata[c] = stack(rvecs[1:howmany])' + S = S[1:howmany] end - if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction - Σdata[c] = Σ + if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction + Sdata[c] = S else - Σdata = SectorDict(c => Σ) + Sdata = SectorDict(c => S) end - dims[c] = length(Σ) + dims[c] = length(S) end - return Udata, Σdata, Vdata, dims + return Udata, Sdata, Vdata, dims end # IterSVD adjoint for tsvd! using KrylovKit.svdsolve adjoint machinery for each block function ChainRulesCore.rrule( - ::typeof(TensorKit.tsvd!), - t::AbstractTensorMap; + ::typeof(PEPSKit.tsvd), + t::AbstractTensorMap, + alg::IterSVD; trunc::TruncationScheme=notrunc(), p::Real=2, - alg::IterSVD=IterSVD(), ) - U, S, V, ϵ = tsvd(t; trunc, p, alg) + U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) - function tsvd!_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + function tsvd_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ)) Δt = similar(t) for (c, b) in blocks(Δt) Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c) Sdc = view(Sc, diagind(Sc)) - ΔSdc = (ΔSc isa AbstractZero) ? ΔSc : view(ΔSc, diagind(ΔSc)) + ΔSdc = ΔSc isa AbstractZero ? ΔSc : view(ΔSc, diagind(ΔSc)) - lvecs = eachcol(Uc) - rvecs = eachrow(Vc) + n_vals = length(Sdc) + lvecs = Vector{Vector{scalartype(t)}}(eachcol(Uc)) + rvecs = Vector{Vector{scalartype(t)}}(eachcol(Vc')) minimal_info = KrylovKit.ConvergenceInfo(length(Sdc), nothing, nothing, -1, -1) # Just supply converged to SVD pullback - xs, ys = compute_svdsolve_pullback_data( - ΔSdc, - eachcol(ΔUc), - eachrow(ΔVc), + + if ΔUc isa AbstractZero && ΔVc isa AbstractZero # Handle ZeroTangent singular vectors + Δlvecs = fill(ZeroTangent(), n_vals) + Δrvecs = fill(ZeroTangent(), n_vals) + else + Δlvecs = Vector{Vector{scalartype(t)}}(eachcol(ΔUc)) + Δrvecs = Vector{Vector{scalartype(t)}}(eachcol(ΔVc')) + end + + xs, ys = CRCExt.compute_svdsolve_pullback_data( + ΔSc isa AbstractZero ? fill(zero(Sc[1]), n_vals) : ΔSdc, + Δlvecs, + Δrvecs, Sdc, lvecs, rvecs, minimal_info, - b, + block(t, c), :LR, alg.alg, alg.alg_rrule, ) - copyto!(b, construct∂f_svd(config, b, lvecs, rvecs, xs, ys)) + copyto!(b, CRCExt.construct∂f_svd(HasReverseMode(), block(t, c), lvecs, rvecs, xs, ys)) end - return NoTangent(), Δt + return NoTangent(), Δt, NoTangent() end - function tsvd!_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) - return NoTangent(), ZeroTangent() + function tsvd_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + return NoTangent(), ZeroTangent(), NoTangent() end - return (U, S, V, ϵ), tsvd!_itersvd_pullback + return (U, S, V, ϵ), tsvd_itersvd_pullback end # Full SVD with old adjoint that doesn't account for truncation properly @@ -113,39 +134,35 @@ end # Perform TensorKit.SVD in forward pass function TensorKit._tsvd!(t, ::OldSVD, trunc::TruncationScheme, p::Real=2) - return TensorKit._tsvd(t, TensorKit.SVD(), trunc, p) + return _tsvd!(t, TensorKit.SVD(), trunc, p) end # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes) function ChainRulesCore.rrule( - ::typeof(TensorKit.tsvd!), - t::AbstractTensorMap; + ::typeof(PEPSKit.tsvd), + t::AbstractTensorMap, + alg::OldSVD; trunc::TruncationScheme=notrunc(), p::Real=2, - alg::OldSVD=OldSVD(), ) - U, S, V, ϵ = tsvd(t; trunc, p, alg) + U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) - function tsvd!_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) - ∂t = similar(t) - for (c, b) in blocks(∂t) + function tsvd_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + Δt = similar(t) + for (c, b) in blocks(Δt) + Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) + ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c) copyto!( - b, - oldsvd_rev( - block(U, c), - block(S, c), - block(V, c), - block(ΔU, c), - block(ΔS, c), - block(ΔV, c); - lorentz_broad=alg.lorentz_broad, - ), + b, oldsvd_rev(Uc, Sc, Vc, ΔUc, ΔSc, ΔVc; lorentz_broad=alg.lorentz_broad) ) end - return NoTangent(), ∂t, NoTangent() + return NoTangent(), Δt, NoTangent() + end + function tsvd_oldsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + return NoTangent(), ZeroTangent(), NoTangent() end - return (U, S, V, ϵ), tsvd!_oldsvd_pullback + return (U, S, V, ϵ), tsvd_oldsvd_pullback end function oldsvd_rev( diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl index ab0989df..c725a6b1 100644 --- a/test/ctmrg/svd_wrapper.jl +++ b/test/ctmrg/svd_wrapper.jl @@ -7,45 +7,46 @@ using ChainRulesCore, Zygote using PEPSKit # Gauge-invariant loss function -function lossfun(A, alg, R=TensorMap(randn, space(A)); trunc=notrunc()) - U, _, V, = tsvd(A; trunc, alg) +function lossfun(A, alg, R=TensorMap(randn, space(A)), trunc=notrunc()) + U, _, V, = PEPSKit.tsvd(A, alg; trunc) return real(dot(R, U * V)) # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n end m, n = 20, 30 dtype = ComplexF64 χ = 12 -trunc = truncdim(χ) +trunc = truncspace(ℂ^χ) # lorentz_broad = 1e-12 -adjoint_tol = 1e-16 +adjoint_tol = 1e-14 # Don't make this too small, g_itersvd will be weird rtol = 1e-9 r = TensorMap(randn, dtype, ℂ^m ← ℂ^n) R = TensorMap(randn, space(r)) @testset "Non-truncacted SVD" begin - l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R), r) + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R), r) l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r) l_itersvd, g_itersvd = withgradient( - A -> lossfun(A, IterSVD(; howmany=min(m, n), adjoint_tol), R), r + A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R, truncspace(ℂ^min(m, n))), r ) - @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd + @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol end @testset "Truncated SVD with χ=$χ" begin - l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R; trunc), r) - l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R; trunc), r) + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R, trunc), r) + l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R, trunc), r) l_itersvd, g_itersvd = withgradient( - A -> lossfun(A, IterSVD(; howmany=χ, adjoint_tol), R; trunc), r + A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R, trunc), r ) - @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd + @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol end +# TODO: Add when Lorentzian broadening is implemented # @testset "Truncated SVD with χ=$χ and ε=$lorentz_broad broadening" begin # l_fullsvd, g_fullsvd = withgradient( # A -> lossfun(A, FullSVD(; lorentz_broad, R; trunc), r From a815e41846e7aa80f4a9f69ed559afc675c9ed10 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Fri, 5 Jul 2024 16:48:37 +0200 Subject: [PATCH 11/27] Add TensorKit compat entry for softened tsvd type restrictions --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 98a70c11..95074716 100644 --- a/Project.toml +++ b/Project.toml @@ -28,7 +28,7 @@ MPSKit = "0.10" OptimKit = "0.3" Printf = "1" Statistics = "1" -TensorKit = "0.12" +TensorKit = "0.12.5" VectorInterface = "0.4" Zygote = "0.6" julia = "1.6" From 447bfd83fece742475044c513ac8ceb91142e897 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Fri, 5 Jul 2024 17:40:23 +0200 Subject: [PATCH 12/27] Add ProjectorAlg and refactor all tests and examples --- examples/boundary_mps.jl | 8 ++++-- examples/heisenberg.jl | 11 ++++---- src/PEPSKit.jl | 2 +- src/algorithms/ctmrg.jl | 59 ++++++++++++++++++++++++++++----------- src/utility/svd.jl | 14 ++++++++-- test/boundarymps/vumps.jl | 8 ++++-- test/ctmrg/gaugefix.jl | 16 +++++------ test/ctmrg/gradients.jl | 5 ++-- test/ctmrg/gradparts.jl | 5 ++-- test/ctmrg/svd_wrapper.jl | 2 +- test/heisenberg.jl | 3 +- test/pwave.jl | 5 ++-- 12 files changed, 89 insertions(+), 49 deletions(-) diff --git a/examples/boundary_mps.jl b/examples/boundary_mps.jl index abd52e17..558b0de2 100644 --- a/examples/boundary_mps.jl +++ b/examples/boundary_mps.jl @@ -33,7 +33,9 @@ N = abs(prod(expectation_value(mps, T))) # This can be compared to the result obtained using the CTMRG algorithm ctm = leading_boundary( - peps, CTMRG(; verbosity=1, fixedspace=true), CTMRGEnv(peps; Venv=ComplexSpace(20)) + peps, + CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)), + CTMRGEnv(peps; Venv=ComplexSpace(20)), ) N´ = abs(norm(peps, ctm)) @@ -56,7 +58,9 @@ mps2, envs2, ϵ = leading_boundary(mps2, T2, VUMPS()) N2 = abs(prod(expectation_value(mps2, T2))) ctm2 = leading_boundary( - peps2, CTMRG(; verbosity=1, fixedspace=true), CTMRGEnv(peps2; Venv=ComplexSpace(20)) + peps2, + CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)), + CTMRGEnv(peps2; Venv=ComplexSpace(20)), ) N2´ = abs(norm(peps2, ctm2)) diff --git a/examples/heisenberg.jl b/examples/heisenberg.jl index 0cfd8c06..65cf65e5 100644 --- a/examples/heisenberg.jl +++ b/examples/heisenberg.jl @@ -11,9 +11,10 @@ H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # Parameters χbond = 2 χenv = 20 -ctmalg = CTMRG(; trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=100, verbosity=1) -alg = PEPSOptimize(; - boundary_alg=ctmalg, +projector_alg = ProjectorAlg(; trscheme=truncdim(χenv)) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, projector_alg) +opt_alg = PEPSOptimize(; + boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2), gradient_alg=GMRES(; tol=1e-6, maxiter=100), reuse_env=true, @@ -27,6 +28,6 @@ alg = PEPSOptimize(; # E/N = −0.6694421, which is a QMC estimate from https://arxiv.org/abs/1101.3281. # Of course there is a noticable bias for small χbond and χenv. ψ₀ = InfinitePEPS(2, χbond) -env₀ = leading_boundary(CTMRGEnv(ψ₀; Venv=ℂ^χenv), ψ₀, ctmalg) -result = fixedpoint(ψ₀, H, alg, env₀) +env₀ = leading_boundary(CTMRGEnv(ψ₀; Venv=ℂ^χenv), ψ₀, ctm_alg) +result = fixedpoint(ψ₀, H, opt_alg, env₀) @show result.E diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index 1ee02e1f..b10f10c8 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -59,7 +59,7 @@ module Defaults end export FullSVD, IterSVD, OldSVD -export CTMRG, CTMRGEnv +export ProjectorAlg, CTMRG, CTMRGEnv export NLocalOperator, AnisotropicNNOperator, OnSite, NearestNeighbor export expectation_value, costfun export leading_boundary diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index 5b4d65dc..46ed8186 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -1,25 +1,46 @@ +# TODO: add option for different projector styles (half-infinite, full-infinite, etc.) +""" + struct ProjectorAlg{S}(; svd_alg = TensorKit.SVD(), trscheme = TensorKit.notrunc(), + fixedspace = false, verbosity = 0) + +Algorithm struct collecting all projector related parameters. The truncation scheme has to be +a `TensorKit.TruncationScheme`, and some SVD algorithms might have further restrictions on what +kind of truncation scheme can be used. If `fixedspace` is true, the truncation scheme is set to +`truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding +environment direction/unit cell entry. +""" +@kwdef struct ProjectorAlg{S} + svd_alg::S = TensorKit.SVD() + trscheme::TruncationScheme = TensorKit.notrunc() + fixedspace::Bool = false + verbosity::Int = 0 +end + +function fix_spaces(alg::ProjectorAlg) + return ProjectorAlg(; + svd_alg=alg.svd_alg, trscheme=alg.trscheme, fixedspace=true, verbosity=alg.verbosity + ) +end + # TODO: add abstract Algorithm type? """ - struct CTMRG(; trscheme = TensorKit.notrunc(), tol = Defaults.ctmrg_tol, - maxiter = Defaults.ctmrg_maxiter, miniter = Defaults.ctmrg_miniter, - verbosity = 0, fixedspace = false) + struct CTMRG(; tol = Defaults.ctmrg_tol, maxiter = Defaults.ctmrg_maxiter, + miniter = Defaults.ctmrg_miniter, verbosity = 0, + projector_alg = ProjectorAlg()) Algorithm struct that represents the CTMRG algorithm for contracting infinite PEPS. -The projector bond dimensions are set via `trscheme` which controls the truncation -properties inside of `TensorKit.tsvd`. Each CTMRG run is converged up to `tol` -where the singular value convergence of the corners as well as the norm is checked. -The maximal and minimal number of CTMRG iterations is set with `maxiter` and `miniter`. -Different levels of output information are printed depending on `verbosity` (0, 1 or 2). -Regardless of the truncation scheme, the space can be kept fixed with `fixedspace`. +Each CTMRG run is converged up to `tol` where the singular value convergence of the +corners as well as the norm is checked. The maximal and minimal number of CTMRG iterations +is set with `maxiter` and `miniter`. Different levels of output information are printed +depending on `verbosity` (0, 1 or 2). All projector related properties are set using the +`ProjectorAlg` struct. """ -@kwdef struct CTMRG{S} +@kwdef struct CTMRG tol::Float64 = Defaults.ctmrg_tol maxiter::Int = Defaults.ctmrg_maxiter miniter::Int = Defaults.ctmrg_miniter verbosity::Int = 0 - svdalg::S = TensorKit.SVD() - trscheme::TruncationScheme = TensorKit.notrunc() - fixedspace::Bool = false + projector_alg::ProjectorAlg = ProjectorAlg() end """ @@ -89,7 +110,11 @@ function MPSKit.leading_boundary(envinit, state, alg::CTMRG) # Do one final iteration that does not change the spaces alg_fixed = CTMRG(; - alg.trscheme, alg.tol, alg.maxiter, alg.miniter, alg.verbosity, fixedspace=true + tol=alg.tol, + maxiter=alg.maxiter, + miniter=alg.miniter, + verbosity=alg.verbosity, + projector_alg=fix_spaces(alg.projector_alg), ) env′, = ctmrg_iter(state, env, alg_fixed) envfix = gauge_fix(env, env′) @@ -285,7 +310,7 @@ function ctmrg_iter(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} ϵ = 0.0 for _ in 1:4 - env, _, _, ϵ₀ = left_move(state, env, alg) + env, _, _, ϵ₀ = left_move(state, env, alg.projector_alg) state = rotate_north(state, EAST) env = rotate_north(env, EAST) ϵ = max(ϵ, ϵ₀) @@ -300,7 +325,7 @@ end Grow, project and renormalize the environment `env` in west direction. Return the updated environment as well as the projectors and truncation error. """ -function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} +function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T} corners::typeof(env.corners) = copy(env.corners) edges::typeof(env.edges) = copy(env.edges) ϵ = 0.0 @@ -336,7 +361,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} alg.trscheme end @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6] - U, S, V, ϵ_local = PEPSKit.tsvd(QQ, alg.svdalg; trunc=trscheme) + U, S, V, ϵ_local = PEPSKit.tsvd(QQ, alg.svd_alg; trunc=trscheme) ϵ = max(ϵ, ϵ_local / norm(S)) # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 1dae6534..23473bef 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -8,7 +8,14 @@ using TensorKit: TruncationSpace CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) -# SVD wrapper for TensorKit.tsvd that dispatches on the alg type +""" + PEPSKit.tsvd(t::AbstractTensorMap, alg; trunc=notrunc(), p=2) + +Wrapper around `TensorKit.tsvd` which dispatches on the `alg` argument. +This is needed since a custom adjoint for `PEPSKit.tsvd` may be defined, +depending on the algorithm. E.g., for `IterSVD` the adjoint for a truncated +SVD from `KrylovKit.svdsolve` is used. +""" function PEPSKit.tsvd( t::AbstractTensorMap, alg; trunc::TruncationScheme=notrunc(), p::Real=2 ) @@ -115,7 +122,10 @@ function ChainRulesCore.rrule( alg.alg, alg.alg_rrule, ) - copyto!(b, CRCExt.construct∂f_svd(HasReverseMode(), block(t, c), lvecs, rvecs, xs, ys)) + copyto!( + b, + CRCExt.construct∂f_svd(HasReverseMode(), block(t, c), lvecs, rvecs, xs, ys), + ) end return NoTangent(), Δt, NoTangent() end diff --git a/test/boundarymps/vumps.jl b/test/boundarymps/vumps.jl index 260f40b3..f5d10216 100644 --- a/test/boundarymps/vumps.jl +++ b/test/boundarymps/vumps.jl @@ -17,7 +17,9 @@ Random.seed!(29384293742893) N = abs(sum(expectation_value(mps, T))) ctm = leading_boundary( - CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1, fixedspace=true) + CTMRGEnv(psi; Venv=ComplexSpace(20)), + psi, + CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)), ) N´ = abs(norm(psi, ctm)) @@ -33,7 +35,9 @@ end N = abs(prod(expectation_value(mps, T))) ctm = leading_boundary( - CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1, fixedspace=true) + CTMRGEnv(psi; Venv=ComplexSpace(20)), + psi, + CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)), ) N´ = abs(norm(psi, ctm)) diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl index 45bee5d5..b5a7d8e6 100644 --- a/test/ctmrg/gaugefix.jl +++ b/test/ctmrg/gaugefix.jl @@ -3,7 +3,7 @@ using Random using PEPSKit using TensorKit -using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence +using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence, fix_space scalartypes = [Float64, ComplexF64] unitcells = [(1, 1), (2, 2), (3, 2)] @@ -45,10 +45,9 @@ end ctm = CTMRGEnv(psi; Venv=ctm_space) verbosity = 1 - alg = CTMRG(; - trscheme=truncdim(dim(ctm_space)), tol=1e-10, miniter=4, maxiter=400, verbosity - ) - alg_fixed = CTMRG(; trscheme=truncdim(dim(ctm_space)), verbosity, fixedspace=true) + projector_alg = ProjectorAlg(; trscheme=truncdim(dim(ctm_space))) + alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, projector_alg) + alg_fixed = CTMRG(; verbosity, projector_alg=fix_spaces(projector_alg)) ctm = leading_boundary(ctm, psi, alg) ctm2, = ctmrg_iter(psi, ctm, alg_fixed) @@ -70,10 +69,9 @@ end ctm = CTMRGEnv(psi; Venv=ctm_space) verbosity = 1 - alg = CTMRG(; - trscheme=truncspace(ctm_space), tol=1e-10, miniter=4, maxiter=400, verbosity - ) - alg_fixed = CTMRG(; trscheme=truncspace(ctm_space), verbosity, fixedspace=true) + projector_alg = ProjectorAlg(; trscheme=truncdim(dim(ctm_space))) + alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, projector_alg) + alg_fixed = CTMRG(; verbosity, projector_alg=fix_spaces(projector_alg)) ctm = leading_boundary(ctm, psi, alg) ctm2, = ctmrg_iter(psi, ctm, alg_fixed) diff --git a/test/ctmrg/gradients.jl b/test/ctmrg/gradients.jl index 2c9b86f9..7ae40c3e 100644 --- a/test/ctmrg/gradients.jl +++ b/test/ctmrg/gradients.jl @@ -17,9 +17,8 @@ models = [square_lattice_heisenberg(), square_lattice_pwave()] names = ["Heisenberg", "p-wave superconductor"] Random.seed!(42039482030) tol = 1e-8 -boundary_alg = CTMRG(; - trscheme=truncdim(χenv), tol=tol, miniter=4, maxiter=100, fixedspace=true, verbosity=0 -) +projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true) +boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0, projector_alg) gradmodes = [ nothing, GeomSum(; tol), ManualIter(; tol), KrylovKit.GMRES(; tol=tol, maxiter=10) ] diff --git a/test/ctmrg/gradparts.jl b/test/ctmrg/gradparts.jl index c21a2420..380bfb9e 100644 --- a/test/ctmrg/gradparts.jl +++ b/test/ctmrg/gradparts.jl @@ -30,9 +30,8 @@ Vspaces = [ComplexSpace(χbond), Vect[FermionParity](0 => χbond / 2, 1 => χbon Espaces = [ComplexSpace(χenv), Vect[FermionParity](0 => χenv / 2, 1 => χenv / 2)] functions = [left_move, ctmrg_iter, leading_boundary] tol = 1e-8 -boundary_alg = CTMRG(; - trscheme=truncdim(χenv), tol=tol, miniter=4, maxiter=100, fixedspace=true, verbosity=0 -) +projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true) +boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0, projector_alg) ## Gauge invariant function of the environment # -------------------------------------------- diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl index c725a6b1..bae40eb8 100644 --- a/test/ctmrg/svd_wrapper.jl +++ b/test/ctmrg/svd_wrapper.jl @@ -26,7 +26,7 @@ R = TensorMap(randn, space(r)) l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R), r) l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r) l_itersvd, g_itersvd = withgradient( - A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R, truncspace(ℂ^min(m, n))), r + A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R), r ) @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd diff --git a/test/heisenberg.jl b/test/heisenberg.jl index 35e12ef7..2f569b9d 100644 --- a/test/heisenberg.jl +++ b/test/heisenberg.jl @@ -9,7 +9,8 @@ using OptimKit H = square_lattice_heisenberg() χbond = 2 χenv = 16 -ctm_alg = CTMRG(; trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=100, verbosity=1) +projector_alg = ProjectorAlg(; trscheme=truncdim(χenv)) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, projector_alg) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-3, verbosity=2), diff --git a/test/pwave.jl b/test/pwave.jl index 4ebb9c85..afde9514 100644 --- a/test/pwave.jl +++ b/test/pwave.jl @@ -9,9 +9,8 @@ using OptimKit H = square_lattice_pwave() χbond = 2 χenv = 16 -ctm_alg = CTMRG(; - trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=50, fixedspace=true, verbosity=1 -) +projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=50, verbosity=1, projector_alg) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=10, gradtol=1e-3, verbosity=2), From bbd132d79f44a692af719d1bf649c2dadc49df1f Mon Sep 17 00:00:00 2001 From: Lukas <37111893+lkdvos@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:04:08 +0200 Subject: [PATCH 13/27] Update MPSKit compat --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 95074716..e913be9f 100644 --- a/Project.toml +++ b/Project.toml @@ -24,7 +24,7 @@ ChainRulesCore = "1.0" Compat = "3.46, 4.2" KrylovKit = "0.8" LinearAlgebra = "1" -MPSKit = "0.10" +MPSKit = "0.11" OptimKit = "0.3" Printf = "1" Statistics = "1" From 0c03cd43ad924468cac7024159b0452fb68a2e59 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Mon, 8 Jul 2024 12:12:08 +0200 Subject: [PATCH 14/27] Replace tsvd with tsvd!, add views to IterSVD adjoint --- Project.toml | 2 +- src/algorithms/ctmrg.jl | 11 ++++++++++- src/utility/svd.jl | 13 +++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index e913be9f..ef730624 100644 --- a/Project.toml +++ b/Project.toml @@ -31,7 +31,7 @@ Statistics = "1" TensorKit = "0.12.5" VectorInterface = "0.4" Zygote = "0.6" -julia = "1.6" +julia = "1.8" [extras] SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index 46ed8186..97b8bd26 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -1,3 +1,12 @@ +""" + FixedSpaceTruncation <: TensorKit.TruncationScheme + +CTMRG specific truncation scheme for `tsvd` which keeps the bond space on which the SVD +is performed fixed. Since different environment directions and unit cell entries might +have different spaces, this truncation style is different from `TruncationSpace`. +""" +struct FixedSpaceTruncation <: TensorKit.TruncationScheme end + # TODO: add option for different projector styles (half-infinite, full-infinite, etc.) """ struct ProjectorAlg{S}(; svd_alg = TensorKit.SVD(), trscheme = TensorKit.notrunc(), @@ -361,7 +370,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T} alg.trscheme end @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6] - U, S, V, ϵ_local = PEPSKit.tsvd(QQ, alg.svd_alg; trunc=trscheme) + U, S, V, ϵ_local = PEPSKit.tsvd!(QQ, alg.svd_alg; trunc=trscheme) ϵ = max(ϵ, ϵ_local / norm(S)) # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 23473bef..6055cde4 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -16,10 +16,11 @@ This is needed since a custom adjoint for `PEPSKit.tsvd` may be defined, depending on the algorithm. E.g., for `IterSVD` the adjoint for a truncated SVD from `KrylovKit.svdsolve` is used. """ -function PEPSKit.tsvd( +PEPSKit.tsvd(t::AbstractTensorMap, alg; kwargs...) = PEPSKit.tsvd!(copy(t), alg; kwargs...) +function PEPSKit.tsvd!( t::AbstractTensorMap, alg; trunc::TruncationScheme=notrunc(), p::Real=2 ) - return TensorKit.tsvd(t; alg, trunc, p) + return TensorKit.tsvd!(t; alg, trunc, p) end # Wrapper around Krylov Kit's GKL iterative SVD solver @@ -64,9 +65,9 @@ function TensorKit._compute_svddata!( Udata[c] = U Vdata[c] = V else # Slice in case more values were converged than requested - Udata[c] = stack(lvecs[1:howmany]) - Vdata[c] = stack(rvecs[1:howmany])' - S = S[1:howmany] + Udata[c] = stack(view(lvecs, 1:howmany)) + Vdata[c] = stack(view(rvecs, 1:howmany))' + S = @view S[1:howmany] end if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction Sdata[c] = S @@ -80,7 +81,7 @@ end # IterSVD adjoint for tsvd! using KrylovKit.svdsolve adjoint machinery for each block function ChainRulesCore.rrule( - ::typeof(PEPSKit.tsvd), + ::typeof(PEPSKit.tsvd!), t::AbstractTensorMap, alg::IterSVD; trunc::TruncationScheme=notrunc(), From 14f0065b4f61ca77a4c21d0325ef0e005f5193c2 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Mon, 8 Jul 2024 15:12:07 +0200 Subject: [PATCH 15/27] Improve IterSVD allocation, implement CTMRG convenience constructor, update tests, implement FixedSpaceTruncation --- examples/boundary_mps.jl | 12 ++------- examples/heisenberg.jl | 3 +-- src/PEPSKit.jl | 2 +- src/algorithms/ctmrg.jl | 57 +++++++++++++++++++-------------------- src/utility/svd.jl | 2 +- test/boundarymps/vumps.jl | 12 ++------- test/ctmrg/gaugefix.jl | 12 ++++----- test/ctmrg/gradients.jl | 3 +-- test/ctmrg/gradparts.jl | 3 +-- test/heisenberg.jl | 3 +-- test/pwave.jl | 3 +-- 11 files changed, 44 insertions(+), 68 deletions(-) diff --git a/examples/boundary_mps.jl b/examples/boundary_mps.jl index 558b0de2..8893f63e 100644 --- a/examples/boundary_mps.jl +++ b/examples/boundary_mps.jl @@ -32,11 +32,7 @@ mps, envs, ϵ = leading_boundary(mps, T, VUMPS()) N = abs(prod(expectation_value(mps, T))) # This can be compared to the result obtained using the CTMRG algorithm -ctm = leading_boundary( - peps, - CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)), - CTMRGEnv(peps; Venv=ComplexSpace(20)), -) +ctm = leading_boundary(peps, CTMRG(; verbosity=1), CTMRGEnv(peps; Venv=ComplexSpace(20))) N´ = abs(norm(peps, ctm)) @show abs(N - N´) / N @@ -57,11 +53,7 @@ mps2 = PEPSKit.initializeMPS(T2, fill(ComplexSpace(20), 2, 2)) mps2, envs2, ϵ = leading_boundary(mps2, T2, VUMPS()) N2 = abs(prod(expectation_value(mps2, T2))) -ctm2 = leading_boundary( - peps2, - CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)), - CTMRGEnv(peps2; Venv=ComplexSpace(20)), -) +ctm2 = leading_boundary(peps2, CTMRG(; verbosity=1), CTMRGEnv(peps2; Venv=ComplexSpace(20))) N2´ = abs(norm(peps2, ctm2)) @show abs(N2 - N2´) / N2 diff --git a/examples/heisenberg.jl b/examples/heisenberg.jl index 65cf65e5..a43f313c 100644 --- a/examples/heisenberg.jl +++ b/examples/heisenberg.jl @@ -11,8 +11,7 @@ H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # Parameters χbond = 2 χenv = 20 -projector_alg = ProjectorAlg(; trscheme=truncdim(χenv)) -ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, projector_alg) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(χenv)) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2), diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index b10f10c8..5d341885 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -59,7 +59,7 @@ module Defaults end export FullSVD, IterSVD, OldSVD -export ProjectorAlg, CTMRG, CTMRGEnv +export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv export NLocalOperator, AnisotropicNNOperator, OnSite, NearestNeighbor export expectation_value, costfun export leading_boundary diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index 97b8bd26..1b424790 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -5,7 +5,7 @@ CTMRG specific truncation scheme for `tsvd` which keeps the bond space on which is performed fixed. Since different environment directions and unit cell entries might have different spaces, this truncation style is different from `TruncationSpace`. """ -struct FixedSpaceTruncation <: TensorKit.TruncationScheme end +struct FixedSpaceTruncation <: TensorKit.TruncationScheme end # TODO: add option for different projector styles (half-infinite, full-infinite, etc.) """ @@ -18,38 +18,43 @@ kind of truncation scheme can be used. If `fixedspace` is true, the truncation s `truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding environment direction/unit cell entry. """ -@kwdef struct ProjectorAlg{S} +@kwdef struct ProjectorAlg{S,T} svd_alg::S = TensorKit.SVD() - trscheme::TruncationScheme = TensorKit.notrunc() - fixedspace::Bool = false + trscheme::T = FixedSpaceTruncation() verbosity::Int = 0 end -function fix_spaces(alg::ProjectorAlg) - return ProjectorAlg(; - svd_alg=alg.svd_alg, trscheme=alg.trscheme, fixedspace=true, verbosity=alg.verbosity - ) -end - # TODO: add abstract Algorithm type? """ - struct CTMRG(; tol = Defaults.ctmrg_tol, maxiter = Defaults.ctmrg_maxiter, - miniter = Defaults.ctmrg_miniter, verbosity = 0, - projector_alg = ProjectorAlg()) + CTMRG(; tol=Defaults.ctmrg_tol, maxiter=Defaults.ctmrg_maxiter, + miniter=Defaults.ctmrg_miniter, verbosity=0, + svd_alg=TensorKit.SVD(), trscheme=FixedSpaceTruncation()) Algorithm struct that represents the CTMRG algorithm for contracting infinite PEPS. Each CTMRG run is converged up to `tol` where the singular value convergence of the corners as well as the norm is checked. The maximal and minimal number of CTMRG iterations is set with `maxiter` and `miniter`. Different levels of output information are printed -depending on `verbosity` (0, 1 or 2). All projector related properties are set using the -`ProjectorAlg` struct. +depending on `verbosity` (0, 1 or 2). The projectors are computed from `svd_alg` SVDs +where the truncation scheme is set via `trscheme`. """ -@kwdef struct CTMRG - tol::Float64 = Defaults.ctmrg_tol - maxiter::Int = Defaults.ctmrg_maxiter - miniter::Int = Defaults.ctmrg_miniter - verbosity::Int = 0 - projector_alg::ProjectorAlg = ProjectorAlg() +struct CTMRG + tol::Float64 + maxiter::Int + miniter::Int + verbosity::Int + projector_alg::ProjectorAlg +end +function CTMRG(; + tol=Defaults.ctmrg_tol, + maxiter=Defaults.ctmrg_maxiter, + miniter=Defaults.ctmrg_miniter, + verbosity=0, + svd_alg=TensorKit.SVD(), + trscheme=FixedSpaceTruncation(), +) + return CTMRG( + tol, maxiter, miniter, verbosity, ProjectorAlg(; svd_alg, trscheme, verbosity) + ) end """ @@ -118,13 +123,7 @@ function MPSKit.leading_boundary(envinit, state, alg::CTMRG) end # Do one final iteration that does not change the spaces - alg_fixed = CTMRG(; - tol=alg.tol, - maxiter=alg.maxiter, - miniter=alg.miniter, - verbosity=alg.verbosity, - projector_alg=fix_spaces(alg.projector_alg), - ) + alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation() env′, = ctmrg_iter(state, env, alg_fixed) envfix = gauge_fix(env, env′) check_elementwise_convergence(env, envfix; atol=alg.tol^(1 / 2)) || @@ -364,7 +363,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T} ) # SVD half-infinite environment - trscheme = if alg.fixedspace == true + trscheme = if alg.trscheme isa FixedSpaceTruncation truncspace(space(env.edges[WEST, row, cnext], 1)) else alg.trscheme diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 6055cde4..1c24b6f5 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -66,7 +66,7 @@ function TensorKit._compute_svddata!( Vdata[c] = V else # Slice in case more values were converged than requested Udata[c] = stack(view(lvecs, 1:howmany)) - Vdata[c] = stack(view(rvecs, 1:howmany))' + Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1) S = @view S[1:howmany] end if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction diff --git a/test/boundarymps/vumps.jl b/test/boundarymps/vumps.jl index f5d10216..f8709028 100644 --- a/test/boundarymps/vumps.jl +++ b/test/boundarymps/vumps.jl @@ -16,11 +16,7 @@ Random.seed!(29384293742893) mps, envs, ϵ = leading_boundary(mps, T, VUMPS()) N = abs(sum(expectation_value(mps, T))) - ctm = leading_boundary( - CTMRGEnv(psi; Venv=ComplexSpace(20)), - psi, - CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)), - ) + ctm = leading_boundary(CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1)) N´ = abs(norm(psi, ctm)) @test N ≈ N´ atol = 1e-3 @@ -34,11 +30,7 @@ end mps, envs, ϵ = leading_boundary(mps, T, VUMPS()) N = abs(prod(expectation_value(mps, T))) - ctm = leading_boundary( - CTMRGEnv(psi; Venv=ComplexSpace(20)), - psi, - CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)), - ) + ctm = leading_boundary(CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1)) N´ = abs(norm(psi, ctm)) @test N ≈ N´ rtol = 1e-3 diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl index b5a7d8e6..268e579e 100644 --- a/test/ctmrg/gaugefix.jl +++ b/test/ctmrg/gaugefix.jl @@ -3,7 +3,7 @@ using Random using PEPSKit using TensorKit -using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence, fix_space +using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence scalartypes = [Float64, ComplexF64] unitcells = [(1, 1), (2, 2), (3, 2)] @@ -45,9 +45,8 @@ end ctm = CTMRGEnv(psi; Venv=ctm_space) verbosity = 1 - projector_alg = ProjectorAlg(; trscheme=truncdim(dim(ctm_space))) - alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, projector_alg) - alg_fixed = CTMRG(; verbosity, projector_alg=fix_spaces(projector_alg)) + alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space))) + alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation() ctm = leading_boundary(ctm, psi, alg) ctm2, = ctmrg_iter(psi, ctm, alg_fixed) @@ -69,9 +68,8 @@ end ctm = CTMRGEnv(psi; Venv=ctm_space) verbosity = 1 - projector_alg = ProjectorAlg(; trscheme=truncdim(dim(ctm_space))) - alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, projector_alg) - alg_fixed = CTMRG(; verbosity, projector_alg=fix_spaces(projector_alg)) + alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space))) + alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation() ctm = leading_boundary(ctm, psi, alg) ctm2, = ctmrg_iter(psi, ctm, alg_fixed) diff --git a/test/ctmrg/gradients.jl b/test/ctmrg/gradients.jl index 7ae40c3e..d7a842d6 100644 --- a/test/ctmrg/gradients.jl +++ b/test/ctmrg/gradients.jl @@ -17,8 +17,7 @@ models = [square_lattice_heisenberg(), square_lattice_pwave()] names = ["Heisenberg", "p-wave superconductor"] Random.seed!(42039482030) tol = 1e-8 -projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true) -boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0, projector_alg) +boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0) gradmodes = [ nothing, GeomSum(; tol), ManualIter(; tol), KrylovKit.GMRES(; tol=tol, maxiter=10) ] diff --git a/test/ctmrg/gradparts.jl b/test/ctmrg/gradparts.jl index 380bfb9e..e1720db8 100644 --- a/test/ctmrg/gradparts.jl +++ b/test/ctmrg/gradparts.jl @@ -30,8 +30,7 @@ Vspaces = [ComplexSpace(χbond), Vect[FermionParity](0 => χbond / 2, 1 => χbon Espaces = [ComplexSpace(χenv), Vect[FermionParity](0 => χenv / 2, 1 => χenv / 2)] functions = [left_move, ctmrg_iter, leading_boundary] tol = 1e-8 -projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true) -boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0, projector_alg) +boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0) ## Gauge invariant function of the environment # -------------------------------------------- diff --git a/test/heisenberg.jl b/test/heisenberg.jl index 2f569b9d..0891fea2 100644 --- a/test/heisenberg.jl +++ b/test/heisenberg.jl @@ -9,8 +9,7 @@ using OptimKit H = square_lattice_heisenberg() χbond = 2 χenv = 16 -projector_alg = ProjectorAlg(; trscheme=truncdim(χenv)) -ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, projector_alg) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(χenv)) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-3, verbosity=2), diff --git a/test/pwave.jl b/test/pwave.jl index afde9514..a94c1d0d 100644 --- a/test/pwave.jl +++ b/test/pwave.jl @@ -9,8 +9,7 @@ using OptimKit H = square_lattice_pwave() χbond = 2 χenv = 16 -projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true) -ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=50, verbosity=1, projector_alg) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=50, verbosity=1) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=10, gradtol=1e-3, verbosity=2), From 9b2c4b7531181fbe980ad22fa47842a44f178525 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Mon, 8 Jul 2024 16:20:16 +0200 Subject: [PATCH 16/27] Fix tests --- test/boundarymps/vumps.jl | 2 +- test/ctmrg/gaugefix.jl | 1 + test/ctmrg/gradparts.jl | 6 ++---- test/pwave.jl | 4 ++-- test/runtests.jl | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/test/boundarymps/vumps.jl b/test/boundarymps/vumps.jl index f8709028..fa4b434d 100644 --- a/test/boundarymps/vumps.jl +++ b/test/boundarymps/vumps.jl @@ -33,7 +33,7 @@ end ctm = leading_boundary(CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1)) N´ = abs(norm(psi, ctm)) - @test N ≈ N´ rtol = 1e-3 + @test N ≈ N´ rtol = 1e-2 end @testset "PEPO runthrough" begin diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl index 268e579e..195df266 100644 --- a/test/ctmrg/gaugefix.jl +++ b/test/ctmrg/gaugefix.jl @@ -2,6 +2,7 @@ using Test using Random using PEPSKit using TensorKit +using Accessors using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence diff --git a/test/ctmrg/gradparts.jl b/test/ctmrg/gradparts.jl index e1720db8..9c2e3442 100644 --- a/test/ctmrg/gradparts.jl +++ b/test/ctmrg/gradparts.jl @@ -49,10 +49,8 @@ end ## Tests # ------ -@testset "Reverse rules for composite parts of the CTMRG fixed point with spacetype $(Vspaces[i])" for i in - eachindex( - Pspaces -) +title = "Reverse rules for composite parts of the CTMRG fixed point with spacetype" +@testset title * "$(Vspaces[i])" for i in eachindex(Pspaces) psi = InfinitePEPS(Pspaces[i], Vspaces[i], Vspaces[i]) env = CTMRGEnv(psi; Venv=Espaces[i]) diff --git a/test/pwave.jl b/test/pwave.jl index a94c1d0d..e9acf179 100644 --- a/test/pwave.jl +++ b/test/pwave.jl @@ -9,11 +9,11 @@ using OptimKit H = square_lattice_pwave() χbond = 2 χenv = 16 -ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=50, verbosity=1) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=10, gradtol=1e-3, verbosity=2), - gradient_alg=GMRES(; tol=1e-3, maxiter=2, krylovdim=50, verbosity=2), + gradient_alg=GMRES(; tol=1e-3, maxiter=2, krylovdim=50), reuse_env=true, verbosity=2, ) diff --git a/test/runtests.jl b/test/runtests.jl index 33e2fc42..cd7d81d6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,7 +10,7 @@ const GROUP = get(ENV, "GROUP", "All") @time @safetestset "Gradients" begin include("ctmrg/gradients.jl") end - @time @safetestset "Gradients" begin + @time @safetestset "SVD wrapper" begin include("ctmrg/svd_wrapper.jl") end end From 0c13d47a1c357ed6aae81385f686de4751044667 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Mon, 8 Jul 2024 17:03:04 +0200 Subject: [PATCH 17/27] Add block-wise dense fallback option --- src/utility/svd.jl | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 1c24b6f5..ded85014 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -26,13 +26,14 @@ end # Wrapper around Krylov Kit's GKL iterative SVD solver @kwdef struct IterSVD alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25) + fallback_threshold::Float64 = Inf lorentz_broad::Float64 = 0.0 alg_rrule::Union{GMRES,BiCGStab,Arnoldi} = GMRES(; tol=1e-14) end # Compute SVD data block-wise using KrylovKit algorithm function TensorKit._tsvd!( - t, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2 + t, alg::Union{IterSVD}, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2 ) # early return if isempty(blocksectors(t)) @@ -59,16 +60,24 @@ function TensorKit._compute_svddata!( for (c, b) in blocks(t) x₀ = randn(eltype(b), size(b, 1)) howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c) - S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg) - if info.converged < howmany # Fall back to dense SVD if not properly converged + + if howmany / minimum(size(b)) > alg.fallback_threshold # Use dense SVD for small blocks U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) - Udata[c] = U - Vdata[c] = V - else # Slice in case more values were converged than requested - Udata[c] = stack(view(lvecs, 1:howmany)) - Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1) - S = @view S[1:howmany] + Udata[c] = @view U[:, 1:howmany] + Vdata[c] = @view V[1:howmany, :] + else + S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg) + if info.converged < howmany # Fall back to dense SVD if not properly converged + U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) + Udata[c] = @view U[:, 1:howmany] + Vdata[c] = @view V[1:howmany, :] + else # Slice in case more values were converged than requested + Udata[c] = stack(view(lvecs, 1:howmany)) + Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1) + end end + + S = @view S[1:howmany] if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction Sdata[c] = S else From 538652d2074255eb8d656904a8f255b80b4a1476 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Mon, 8 Jul 2024 18:40:38 +0200 Subject: [PATCH 18/27] Add SVDrrule wrapper, add separate adjoint structs and rrules, update SVD test, add docs --- src/PEPSKit.jl | 2 +- src/algorithms/ctmrg.jl | 6 +- src/utility/svd.jl | 119 +++++++++++++++++++++++++++----------- test/ctmrg/svd_wrapper.jl | 36 ++++++------ 4 files changed, 108 insertions(+), 55 deletions(-) diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index 5d341885..a95366b9 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -58,7 +58,7 @@ module Defaults const fpgrad_tol = 1e-6 end -export FullSVD, IterSVD, OldSVD +export SVDrrule, IterSVD, OldSVD, CompleteSVDAdjoint, SparseSVDAdjoint, NonTruncSVDAdjoint export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv export NLocalOperator, AnisotropicNNOperator, OnSite, NearestNeighbor export expectation_value, costfun diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index 1b424790..3cd745df 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -18,8 +18,8 @@ kind of truncation scheme can be used. If `fixedspace` is true, the truncation s `truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding environment direction/unit cell entry. """ -@kwdef struct ProjectorAlg{S,T} - svd_alg::S = TensorKit.SVD() +@kwdef struct ProjectorAlg{S<:SVDrrule,T} + svd_alg::S = SVDrrule() trscheme::T = FixedSpaceTruncation() verbosity::Int = 0 end @@ -49,7 +49,7 @@ function CTMRG(; maxiter=Defaults.ctmrg_maxiter, miniter=Defaults.ctmrg_miniter, verbosity=0, - svd_alg=TensorKit.SVD(), + svd_alg=SVDrrule(), trscheme=FixedSpaceTruncation(), ) return CTMRG( diff --git a/src/utility/svd.jl b/src/utility/svd.jl index ded85014..9a2651ff 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -8,6 +8,16 @@ using TensorKit: TruncationSpace CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) +""" + struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = CompleteSVDAdjoint()) + +Wrapper for a SVD algorithm `svd_alg` with a defined reverse rule `rrule_alg`. +""" +@kwdef struct SVDrrule{S,R} + svd_alg::S = TensorKit.SVD() + rrule_alg::R = CompleteSVDAdjoint() # TODO: should contain Lorentzian broadening eventually +end # Keep truncation algorithm separate to be able to specify CTMRG dependent information + """ PEPSKit.tsvd(t::AbstractTensorMap, alg; trunc=notrunc(), p=2) @@ -18,22 +28,28 @@ SVD from `KrylovKit.svdsolve` is used. """ PEPSKit.tsvd(t::AbstractTensorMap, alg; kwargs...) = PEPSKit.tsvd!(copy(t), alg; kwargs...) function PEPSKit.tsvd!( - t::AbstractTensorMap, alg; trunc::TruncationScheme=notrunc(), p::Real=2 + t::AbstractTensorMap, alg::SVDrrule; trunc::TruncationScheme=notrunc(), p::Real=2 ) - return TensorKit.tsvd!(t; alg, trunc, p) + return TensorKit.tsvd!(t; alg=alg.svd_alg, trunc, p) end -# Wrapper around Krylov Kit's GKL iterative SVD solver +""" + struct IterSVD(; alg = KrylovKit.GKL(), fallback_threshold = Inf) + +Iterative SVD solver based on KrylovKit's GKL algorithm, adapted to (symmmetric) tensors. +The number of targeted singular values is set via the `TruncationSpace` in `ProjectorAlg`. +In particular, this make it possible to specify the targeted singular values block-wise. +In case the symmetry block is too small as compared to the number of singular values, or +the iterative SVD didn't converge, the algorithm falls back to a dense SVD. +""" @kwdef struct IterSVD alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25) fallback_threshold::Float64 = Inf - lorentz_broad::Float64 = 0.0 - alg_rrule::Union{GMRES,BiCGStab,Arnoldi} = GMRES(; tol=1e-14) end # Compute SVD data block-wise using KrylovKit algorithm function TensorKit._tsvd!( - t, alg::Union{IterSVD}, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2 + t, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2 ) # early return if isempty(blocksectors(t)) @@ -63,14 +79,14 @@ function TensorKit._compute_svddata!( if howmany / minimum(size(b)) > alg.fallback_threshold # Use dense SVD for small blocks U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) - Udata[c] = @view U[:, 1:howmany] - Vdata[c] = @view V[1:howmany, :] + Udata[c] = U[:, 1:howmany] + Vdata[c] = V[1:howmany, :] else S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg) if info.converged < howmany # Fall back to dense SVD if not properly converged U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) - Udata[c] = @view U[:, 1:howmany] - Vdata[c] = @view V[1:howmany, :] + Udata[c] = U[:, 1:howmany] + Vdata[c] = V[1:howmany, :] else # Slice in case more values were converged than requested Udata[c] = stack(view(lvecs, 1:howmany)) Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1) @@ -88,17 +104,45 @@ function TensorKit._compute_svddata!( return Udata, Sdata, Vdata, dims end -# IterSVD adjoint for tsvd! using KrylovKit.svdsolve adjoint machinery for each block +""" + struct CompleteSVDAdjoint(; lorentz_broadening = 0.0) + +Wrapper around the complete `TensorKit.tsvd!` rrule which requires computing the full SVD. +""" +@kwdef struct CompleteSVDAdjoint + lorentz_broadening::Float64 = 0.0 +end + function ChainRulesCore.rrule( ::typeof(PEPSKit.tsvd!), t::AbstractTensorMap, - alg::IterSVD; + alg::SVDrrule{A,CompleteSVDAdjoint}; trunc::TruncationScheme=notrunc(), p::Real=2, -) +) where {A} + return rrule(TensorKit.tsvd!; trunc, p, alg=alg.svd_alg) +end + +""" + struct SparseSVDAdjoint(; lorentz_broadening = 0.0) + +Wrapper around the `KrylovKit.svdsolve` rrule where only the truncated decomposition is required. +""" +@kwdef struct SparseSVDAdjoint + alg::Union{GMRES,BiCGStab,Arnoldi} = GMRES() + lorentz_broadening::Float64 = 0.0 +end + +function ChainRulesCore.rrule( + ::typeof(PEPSKit.tsvd!), + t::AbstractTensorMap, + alg::SVDrrule{A,SparseSVDAdjoint}; + trunc::TruncationScheme=notrunc(), + p::Real=2, +) where {A} U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) - function tsvd_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + function tsvd_sparsesvd_pullback((ΔU, ΔS, ΔV, Δϵ)) Δt = similar(t) for (c, b) in blocks(Δt) Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) @@ -129,8 +173,8 @@ function ChainRulesCore.rrule( minimal_info, block(t, c), :LR, - alg.alg, - alg.alg_rrule, + alg.svd_alg.alg, + alg.rrule_alg.alg, ) copyto!( b, @@ -139,50 +183,57 @@ function ChainRulesCore.rrule( end return NoTangent(), Δt, NoTangent() end - function tsvd_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + function tsvd_sparsesvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) return NoTangent(), ZeroTangent(), NoTangent() end - return (U, S, V, ϵ), tsvd_itersvd_pullback + return (U, S, V, ϵ), tsvd_sparsesvd_pullback end -# Full SVD with old adjoint that doesn't account for truncation properly -@kwdef struct OldSVD{A<:Union{TensorKit.SDD,TensorKit.SVD,IterSVD}} - alg::A = TensorKit.SVD() - lorentz_broad::Float64 = 0.0 -end +""" + struct NonTruncAdjoint(; lorentz_broadening = 0.0) -# Perform TensorKit.SVD in forward pass -function TensorKit._tsvd!(t, ::OldSVD, trunc::TruncationScheme, p::Real=2) - return _tsvd!(t, TensorKit.SVD(), trunc, p) +Old SVD adjoint that does not account for the truncated part of truncated SVDs. +""" +@kwdef struct NonTruncSVDAdjoint + lorentz_broadening::Float64 = 0.0 end # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes) function ChainRulesCore.rrule( ::typeof(PEPSKit.tsvd), t::AbstractTensorMap, - alg::OldSVD; + alg::SVDrrule{A,NonTruncSVDAdjoint}; trunc::TruncationScheme=notrunc(), p::Real=2, -) +) where {A} U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) - function tsvd_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + function tsvd_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) Δt = similar(t) for (c, b) in blocks(Δt) Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c) copyto!( - b, oldsvd_rev(Uc, Sc, Vc, ΔUc, ΔSc, ΔVc; lorentz_broad=alg.lorentz_broad) + b, + oldsvd_rev( + Uc, + Sc, + Vc, + ΔUc, + ΔSc, + ΔVc; + lorentz_broadening=alg.rrule_alg.lorentz_broadening, + ), ) end return NoTangent(), Δt, NoTangent() end - function tsvd_oldsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + function tsvd_nontruncsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) return NoTangent(), ZeroTangent(), NoTangent() end - return (U, S, V, ϵ), tsvd_oldsvd_pullback + return (U, S, V, ϵ), tsvd_nontruncsvd_pullback end function oldsvd_rev( @@ -192,12 +243,12 @@ function oldsvd_rev( ΔU, ΔS, ΔV; - lorentz_broad=0, + lorentz_broadening=0, atol::Real=0, rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4), ) tol = atol > 0 ? atol : rtol * S[1, 1] - F = _invert_S²(S, tol, lorentz_broad) # Includes Lorentzian broadening + F = _invert_S²(S, tol, lorentz_broadening) # Includes Lorentzian broadening S⁻¹ = pinv(S; atol=tol) # dS contribution diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl index bae40eb8..0f3bd1da 100644 --- a/test/ctmrg/svd_wrapper.jl +++ b/test/ctmrg/svd_wrapper.jl @@ -16,18 +16,22 @@ m, n = 20, 30 dtype = ComplexF64 χ = 12 trunc = truncspace(ℂ^χ) -# lorentz_broad = 1e-12 -adjoint_tol = 1e-14 # Don't make this too small, g_itersvd will be weird +# lorentz_broadening = 1e-12 rtol = 1e-9 r = TensorMap(randn, dtype, ℂ^m ← ℂ^n) R = TensorMap(randn, space(r)) +full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=CompleteSVDAdjoint()) +old_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint()) +iter_alg = SVDrrule(; # Don't make adjoint tolerance too small, g_itersvd will be weird + svd_alg=IterSVD(), + rrule_alg=SparseSVDAdjoint(; alg=GMRES(; tol=1e-14)), +) + @testset "Non-truncacted SVD" begin - l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R), r) - l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r) - l_itersvd, g_itersvd = withgradient( - A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R), r - ) + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, R), r) + l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, old_alg, R), r) + l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R), r) @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol @@ -35,11 +39,9 @@ R = TensorMap(randn, space(r)) end @testset "Truncated SVD with χ=$χ" begin - l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R, trunc), r) - l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R, trunc), r) - l_itersvd, g_itersvd = withgradient( - A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R, trunc), r - ) + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, R, trunc), r) + l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, old_alg, R, trunc), r) + l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R, trunc), r) @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol @@ -47,16 +49,16 @@ end end # TODO: Add when Lorentzian broadening is implemented -# @testset "Truncated SVD with χ=$χ and ε=$lorentz_broad broadening" begin +# @testset "Truncated SVD with χ=$χ and ε=$lorentz_broadening broadening" begin # l_fullsvd, g_fullsvd = withgradient( -# A -> lossfun(A, FullSVD(; lorentz_broad, R; trunc), r +# A -> lossfun(A, FullSVD(; lorentz_broadening, R; trunc), r # ) -# l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(; lorentz_broad), R; trunc), r) +# l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(; lorentz_broadening), R; trunc), r) # l_itersvd, g_itersvd = withgradient( -# A -> lossfun(A, IterSVD(; howmany=χ, lorentz_broad), R; trunc), r +# A -> lossfun(A, IterSVD(; howmany=χ, lorentz_broadening), R; trunc), r # ) # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd # @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol # @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol -# end +# end \ No newline at end of file From 7032ed0575df0d998417be7a17b7b1c1a91fb09d Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 9 Jul 2024 10:45:36 +0200 Subject: [PATCH 19/27] Add IterSVD test for symmetric tensor with fallback --- src/utility/svd.jl | 18 ++++++++++-------- test/ctmrg/svd_wrapper.jl | 32 ++++++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 9a2651ff..26b35b04 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -120,7 +120,9 @@ function ChainRulesCore.rrule( trunc::TruncationScheme=notrunc(), p::Real=2, ) where {A} - return rrule(TensorKit.tsvd!; trunc, p, alg=alg.svd_alg) + fwd, tsvd!_pullback = rrule(TensorKit.tsvd!, t; trunc, p, alg=alg.svd_alg) + tsvd!_completesvd_pullback(Δsvd) = tsvd!_pullback(Δsvd)..., NoTangent() + return fwd, tsvd!_completesvd_pullback end """ @@ -142,7 +144,7 @@ function ChainRulesCore.rrule( ) where {A} U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) - function tsvd_sparsesvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + function tsvd!_sparsesvd_pullback((ΔU, ΔS, ΔV, Δϵ)) Δt = similar(t) for (c, b) in blocks(Δt) Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) @@ -183,11 +185,11 @@ function ChainRulesCore.rrule( end return NoTangent(), Δt, NoTangent() end - function tsvd_sparsesvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + function tsvd!_sparsesvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) return NoTangent(), ZeroTangent(), NoTangent() end - return (U, S, V, ϵ), tsvd_sparsesvd_pullback + return (U, S, V, ϵ), tsvd!_sparsesvd_pullback end """ @@ -201,7 +203,7 @@ end # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes) function ChainRulesCore.rrule( - ::typeof(PEPSKit.tsvd), + ::typeof(PEPSKit.tsvd!), t::AbstractTensorMap, alg::SVDrrule{A,NonTruncSVDAdjoint}; trunc::TruncationScheme=notrunc(), @@ -209,7 +211,7 @@ function ChainRulesCore.rrule( ) where {A} U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) - function tsvd_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + function tsvd!_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) Δt = similar(t) for (c, b) in blocks(Δt) Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) @@ -229,11 +231,11 @@ function ChainRulesCore.rrule( end return NoTangent(), Δt, NoTangent() end - function tsvd_nontruncsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + function tsvd!_nontruncsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) return NoTangent(), ZeroTangent(), NoTangent() end - return (U, S, V, ϵ), tsvd_nontruncsvd_pullback + return (U, S, V, ϵ), tsvd!_nontruncsvd_pullback end function oldsvd_rev( diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl index 0f3bd1da..1a65da04 100644 --- a/test/ctmrg/svd_wrapper.jl +++ b/test/ctmrg/svd_wrapper.jl @@ -4,6 +4,7 @@ using LinearAlgebra using TensorKit using KrylovKit using ChainRulesCore, Zygote +using Accessors using PEPSKit # Gauge-invariant loss function @@ -18,14 +19,14 @@ dtype = ComplexF64 trunc = truncspace(ℂ^χ) # lorentz_broadening = 1e-12 rtol = 1e-9 -r = TensorMap(randn, dtype, ℂ^m ← ℂ^n) +r = TensorMap(randn, dtype, ℂ^m, ℂ^n) R = TensorMap(randn, space(r)) full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=CompleteSVDAdjoint()) old_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint()) iter_alg = SVDrrule(; # Don't make adjoint tolerance too small, g_itersvd will be weird - svd_alg=IterSVD(), - rrule_alg=SparseSVDAdjoint(; alg=GMRES(; tol=1e-14)), + svd_alg=IterSVD(; alg=GKL(; krylovdim=50)), + rrule_alg=SparseSVDAdjoint(; alg=GMRES(; tol=1e-13)), ) @testset "Non-truncacted SVD" begin @@ -61,4 +62,27 @@ end # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd # @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol # @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol -# end \ No newline at end of file +# end + +symm_m, symm_n = 18, 24 +symm_space = Z2Space(0 => symm_m, 1 => symm_n) +symm_trspace = truncspace(Z2Space(0 => symm_m ÷ 2, 1 => symm_n ÷ 3)) +symm_r = TensorMap(randn, dtype, symm_space, symm_space) +symm_R = TensorMap(randn, dtype, space(symm_r)) + +@testset "IterSVD of symmetric tensors" begin + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, symm_R), symm_r) + l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, symm_R), symm_r) + @test l_itersvd ≈ l_fullsvd + @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol + + l_fullsvd_tr, g_fullsvd_tr = withgradient(A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r) + l_itersvd_tr, g_itersvd_tr = withgradient(A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r) + @test l_itersvd_tr ≈ l_fullsvd_tr + @test norm(g_fullsvd_tr[1] - g_itersvd_tr[1]) / norm(g_fullsvd_tr[1]) < rtol + + iter_alg_fallback = @set iter_alg.svd_alg.fallback_threshold = 0.4 # Do dense SVD in one block, sparse SVD in the other + l_itersvd_fb, g_itersvd_fb = withgradient(A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r) + @test l_itersvd_fb ≈ l_fullsvd_tr + @test norm(g_fullsvd_tr[1] - g_itersvd_fb[1]) / norm(g_fullsvd_tr[1]) < rtol +end From d09561f9262871e9b85a6c2d589993aac9f4ca89 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 9 Jul 2024 15:43:53 +0200 Subject: [PATCH 20/27] Formatting --- test/ctmrg/gaugefix.jl | 8 ++++++-- test/ctmrg/svd_wrapper.jl | 12 +++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl index 195df266..a6883474 100644 --- a/test/ctmrg/gaugefix.jl +++ b/test/ctmrg/gaugefix.jl @@ -46,7 +46,9 @@ end ctm = CTMRGEnv(psi; Venv=ctm_space) verbosity = 1 - alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space))) + alg = CTMRG(; + tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space)) + ) alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation() ctm = leading_boundary(ctm, psi, alg) @@ -69,7 +71,9 @@ end ctm = CTMRGEnv(psi; Venv=ctm_space) verbosity = 1 - alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space))) + alg = CTMRG(; + tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space)) + ) alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation() ctm = leading_boundary(ctm, psi, alg) diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl index 1a65da04..b21c2af7 100644 --- a/test/ctmrg/svd_wrapper.jl +++ b/test/ctmrg/svd_wrapper.jl @@ -76,13 +76,19 @@ symm_R = TensorMap(randn, dtype, space(symm_r)) @test l_itersvd ≈ l_fullsvd @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol - l_fullsvd_tr, g_fullsvd_tr = withgradient(A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r) - l_itersvd_tr, g_itersvd_tr = withgradient(A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r) + l_fullsvd_tr, g_fullsvd_tr = withgradient( + A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r + ) + l_itersvd_tr, g_itersvd_tr = withgradient( + A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r + ) @test l_itersvd_tr ≈ l_fullsvd_tr @test norm(g_fullsvd_tr[1] - g_itersvd_tr[1]) / norm(g_fullsvd_tr[1]) < rtol iter_alg_fallback = @set iter_alg.svd_alg.fallback_threshold = 0.4 # Do dense SVD in one block, sparse SVD in the other - l_itersvd_fb, g_itersvd_fb = withgradient(A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r) + l_itersvd_fb, g_itersvd_fb = withgradient( + A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r + ) @test l_itersvd_fb ≈ l_fullsvd_tr @test norm(g_fullsvd_tr[1] - g_itersvd_fb[1]) / norm(g_fullsvd_tr[1]) < rtol end From b3a0726491d46702808b74ae9f12e861d34e5a85 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 9 Jul 2024 16:04:46 +0200 Subject: [PATCH 21/27] Fix missing cnext in ctmrg, update README example --- README.md | 13 +++---------- docs/src/index.md | 13 +++---------- src/algorithms/ctmrg.jl | 1 + 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index a45818d6..e069d7a2 100644 --- a/README.md +++ b/README.md @@ -36,19 +36,12 @@ For example, in order to obtain the groundstate of the 2D Heisenberg model, we c using TensorKit, PEPSKit, KrylovKit, OptimKit # constructing the Hamiltonian: -Jx, Jy, Jz = (-1, 1, -1) # sublattice rotation to obtain single-site unit cell -physical_space = ComplexSpace(2) -T = ComplexF64 -σx = TensorMap(T[0 1; 1 0], physical_space, physical_space) -σy = TensorMap(T[0 im; -im 0], physical_space, physical_space) -σz = TensorMap(T[1 0; 0 -1], physical_space, physical_space) -H = (Jx * σx ⊗ σx) + (Jy * σy ⊗ σy) + (Jz * σz ⊗ σz) -Heisenberg_hamiltonian = NLocalOperator{NearestNeighbor}(H / 4) +H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # sublattice rotation to obtain single-site unit cell # configuring the parameters D = 2 chi = 20 -ctm_alg = CTMRG(; trscheme = truncdim(chi), tol=1e-20, miniter=4, maxiter=100, verbosity=1) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(chi)) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2), @@ -60,7 +53,7 @@ opt_alg = PEPSOptimize(; # ground state search state = InfinitePEPS(2, D) ctm = leading_boundary(CTMRGEnv(state; Venv=ComplexSpace(chi)), state, ctm_alg) -result = fixedpoint(state, Heisenberg_hamiltonian, opt_alg, ctm) +result = fixedpoint(state, H, opt_alg, ctm) @show result.E # -0.6625... ``` diff --git a/docs/src/index.md b/docs/src/index.md index 305f6d84..ea76186c 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -21,19 +21,12 @@ For example, in order to obtain the groundstate of the 2D Heisenberg model, we c using TensorKit, PEPSKit, KrylovKit, OptimKit # constructing the Hamiltonian: -Jx, Jy, Jz = (-1, 1, -1) # sublattice rotation to obtain single-site unit cell -physical_space = ComplexSpace(2) -T = ComplexF64 -σx = TensorMap(T[0 1; 1 0], physical_space, physical_space) -σy = TensorMap(T[0 im; -im 0], physical_space, physical_space) -σz = TensorMap(T[1 0; 0 -1], physical_space, physical_space) -H = (Jx * σx ⊗ σx) + (Jy * σy ⊗ σy) + (Jz * σz ⊗ σz) -Heisenberg_hamiltonian = NLocalOperator{NearestNeighbor}(H / 4) +H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # sublattice rotation to obtain single-site unit cell # configuring the parameters D = 2 chi = 20 -ctm_alg = CTMRG(; trscheme = truncdim(chi), tol=1e-20, miniter=4, maxiter=100, verbosity=1) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(chi)) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2), @@ -45,7 +38,7 @@ opt_alg = PEPSOptimize(; # ground state search state = InfinitePEPS(2, D) ctm = leading_boundary(CTMRGEnv(state; Venv=ComplexSpace(chi)), state, ctm_alg) -result = fixedpoint(state, Heisenberg_hamiltonian, opt_alg, ctm) +result = fixedpoint(state, H, opt_alg, ctm) @show result.E # -0.6625... ``` diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index d0073465..69d4a465 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -346,6 +346,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T} for col in 1:size(state, 2) cprev = _prev(col, size(state, 2)) + cnext = _next(col, size(state, 2)) # Compute projectors for row in 1:size(state, 1) From 89ae0a4e48909a69b3508b1271f8acf784e86149 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 9 Jul 2024 16:35:36 +0200 Subject: [PATCH 22/27] Rename DenseSVDAdjoint, update svd_wrapper test --- src/PEPSKit.jl | 2 +- src/utility/svd.jl | 16 ++++++++-------- test/ctmrg/svd_wrapper.jl | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index 650195c5..5fbbf739 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -59,7 +59,7 @@ module Defaults const fpgrad_tol = 1e-6 end -export SVDrrule, IterSVD, OldSVD, CompleteSVDAdjoint, SparseSVDAdjoint, NonTruncSVDAdjoint +export SVDrrule, IterSVD, OldSVD, DenseSVDAdjoint, SparseSVDAdjoint, NonTruncSVDAdjoint export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv export LocalOperator export expectation_value, costfun diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 26b35b04..b86df78d 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -9,13 +9,13 @@ using TensorKit: CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) """ - struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = CompleteSVDAdjoint()) + struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = DenseSVDAdjoint()) Wrapper for a SVD algorithm `svd_alg` with a defined reverse rule `rrule_alg`. """ @kwdef struct SVDrrule{S,R} svd_alg::S = TensorKit.SVD() - rrule_alg::R = CompleteSVDAdjoint() # TODO: should contain Lorentzian broadening eventually + rrule_alg::R = DenseSVDAdjoint() # TODO: should contain Lorentzian broadening eventually end # Keep truncation algorithm separate to be able to specify CTMRG dependent information """ @@ -105,18 +105,18 @@ function TensorKit._compute_svddata!( end """ - struct CompleteSVDAdjoint(; lorentz_broadening = 0.0) + struct DenseSVDAdjoint(; lorentz_broadening = 0.0) Wrapper around the complete `TensorKit.tsvd!` rrule which requires computing the full SVD. """ -@kwdef struct CompleteSVDAdjoint +@kwdef struct DenseSVDAdjoint lorentz_broadening::Float64 = 0.0 end function ChainRulesCore.rrule( ::typeof(PEPSKit.tsvd!), t::AbstractTensorMap, - alg::SVDrrule{A,CompleteSVDAdjoint}; + alg::SVDrrule{A,DenseSVDAdjoint}; trunc::TruncationScheme=notrunc(), p::Real=2, ) where {A} @@ -130,15 +130,15 @@ end Wrapper around the `KrylovKit.svdsolve` rrule where only the truncated decomposition is required. """ -@kwdef struct SparseSVDAdjoint - alg::Union{GMRES,BiCGStab,Arnoldi} = GMRES() +@kwdef struct SparseSVDAdjoint{A} + alg::A = GMRES() lorentz_broadening::Float64 = 0.0 end function ChainRulesCore.rrule( ::typeof(PEPSKit.tsvd!), t::AbstractTensorMap, - alg::SVDrrule{A,SparseSVDAdjoint}; + alg::SVDrrule{A,<:SparseSVDAdjoint}; trunc::TruncationScheme=notrunc(), p::Real=2, ) where {A} diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl index b21c2af7..7a69afea 100644 --- a/test/ctmrg/svd_wrapper.jl +++ b/test/ctmrg/svd_wrapper.jl @@ -22,7 +22,7 @@ rtol = 1e-9 r = TensorMap(randn, dtype, ℂ^m, ℂ^n) R = TensorMap(randn, space(r)) -full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=CompleteSVDAdjoint()) +full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=DenseSVDAdjoint()) old_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint()) iter_alg = SVDrrule(; # Don't make adjoint tolerance too small, g_itersvd will be weird svd_alg=IterSVD(; alg=GKL(; krylovdim=50)), @@ -35,8 +35,8 @@ iter_alg = SVDrrule(; # Don't make adjoint tolerance too small, g_itersvd will l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R), r) @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd - @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol - @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol + @test g_fullsvd[1] ≈ g_oldsvd[1] rtol = rtol + @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol end @testset "Truncated SVD with χ=$χ" begin @@ -45,8 +45,8 @@ end l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R, trunc), r) @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd - @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol - @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol + @test !isapprox(g_fullsvd[1], g_oldsvd[1]; rtol) + @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol end # TODO: Add when Lorentzian broadening is implemented @@ -74,7 +74,7 @@ symm_R = TensorMap(randn, dtype, space(symm_r)) l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, symm_R), symm_r) l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, symm_R), symm_r) @test l_itersvd ≈ l_fullsvd - @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol + @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol l_fullsvd_tr, g_fullsvd_tr = withgradient( A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r @@ -83,12 +83,12 @@ symm_R = TensorMap(randn, dtype, space(symm_r)) A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r ) @test l_itersvd_tr ≈ l_fullsvd_tr - @test norm(g_fullsvd_tr[1] - g_itersvd_tr[1]) / norm(g_fullsvd_tr[1]) < rtol + @test g_fullsvd_tr[1] ≈ g_itersvd_tr[1] rtol = rtol iter_alg_fallback = @set iter_alg.svd_alg.fallback_threshold = 0.4 # Do dense SVD in one block, sparse SVD in the other l_itersvd_fb, g_itersvd_fb = withgradient( A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r ) @test l_itersvd_fb ≈ l_fullsvd_tr - @test norm(g_fullsvd_tr[1] - g_itersvd_fb[1]) / norm(g_fullsvd_tr[1]) < rtol + @test g_fullsvd_tr[1] ≈ g_itersvd_fb[1] rtol = rtol end From 25d198cdc0c296046f11d008f03c3e950be5e572 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 9 Jul 2024 16:50:01 +0200 Subject: [PATCH 23/27] Make CRCExt extension backwards compatible with v1.8 --- src/utility/svd.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/utility/svd.jl b/src/utility/svd.jl index b86df78d..8305ac9d 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -6,7 +6,13 @@ using TensorKit: _create_svdtensors, NoTruncation, TruncationSpace -CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) + +# Enables backwards compatibility without package extensions +CRCExt = @static if isdefined(Base, :get_extension) + Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) +else + KrylovKit.KrylovKitChainRulesCoreExt +end """ struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = DenseSVDAdjoint()) From 6b818e7008f5f7571412ef3308b02c78e25af7ac Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Tue, 9 Jul 2024 17:55:28 +0200 Subject: [PATCH 24/27] Replace SVDrrule with SVDAdjoint, clean up adjoint algorithms --- src/PEPSKit.jl | 2 +- src/algorithms/ctmrg.jl | 6 +-- src/utility/svd.jl | 83 +++++++++++---------------------------- test/ctmrg/svd_wrapper.jl | 11 +++--- 4 files changed, 32 insertions(+), 70 deletions(-) diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index 5fbbf739..8d8815ec 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -59,7 +59,7 @@ module Defaults const fpgrad_tol = 1e-6 end -export SVDrrule, IterSVD, OldSVD, DenseSVDAdjoint, SparseSVDAdjoint, NonTruncSVDAdjoint +export SVDAdjoint, IterSVD, NonTruncSVDAdjoint export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv export LocalOperator export expectation_value, costfun diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index 69d4a465..69bef21a 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -18,8 +18,8 @@ kind of truncation scheme can be used. If `fixedspace` is true, the truncation s `truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding environment direction/unit cell entry. """ -@kwdef struct ProjectorAlg{S<:SVDrrule,T} - svd_alg::S = SVDrrule() +@kwdef struct ProjectorAlg{S<:SVDAdjoint,T} + svd_alg::S = SVDAdjoint() trscheme::T = FixedSpaceTruncation() verbosity::Int = 0 end @@ -49,7 +49,7 @@ function CTMRG(; maxiter=Defaults.ctmrg_maxiter, miniter=Defaults.ctmrg_miniter, verbosity=0, - svd_alg=SVDrrule(), + svd_alg=SVDAdjoint(), trscheme=FixedSpaceTruncation(), ) return CTMRG( diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 8305ac9d..c76ddb9f 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -15,13 +15,18 @@ else end """ - struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = DenseSVDAdjoint()) + struct SVDAdjoint(; fwd_alg = TensorKit.SVD(), rrule_alg = nothing, + broadening = nothing) -Wrapper for a SVD algorithm `svd_alg` with a defined reverse rule `rrule_alg`. +Wrapper for a SVD algorithm `fwd_alg` with a defined reverse rule `rrule_alg`. +If `isnothing(rrule_alg)`, Zygote differentiates the forward call automatically. +In case of degenerate singular values, one might need a `broadening` scheme which +removes the divergences from the adjoint. """ -@kwdef struct SVDrrule{S,R} - svd_alg::S = TensorKit.SVD() - rrule_alg::R = DenseSVDAdjoint() # TODO: should contain Lorentzian broadening eventually +@kwdef struct SVDAdjoint{F,R,B} + fwd_alg::F = TensorKit.SVD() + rrule_alg::R = nothing + broadening::B = nothing end # Keep truncation algorithm separate to be able to specify CTMRG dependent information """ @@ -34,9 +39,9 @@ SVD from `KrylovKit.svdsolve` is used. """ PEPSKit.tsvd(t::AbstractTensorMap, alg; kwargs...) = PEPSKit.tsvd!(copy(t), alg; kwargs...) function PEPSKit.tsvd!( - t::AbstractTensorMap, alg::SVDrrule; trunc::TruncationScheme=notrunc(), p::Real=2 + t::AbstractTensorMap, alg::SVDAdjoint; trunc::TruncationScheme=notrunc(), p::Real=2 ) - return TensorKit.tsvd!(t; alg=alg.svd_alg, trunc, p) + return TensorKit.tsvd!(t; alg=alg.fwd_alg, trunc, p) end """ @@ -110,47 +115,16 @@ function TensorKit._compute_svddata!( return Udata, Sdata, Vdata, dims end -""" - struct DenseSVDAdjoint(; lorentz_broadening = 0.0) - -Wrapper around the complete `TensorKit.tsvd!` rrule which requires computing the full SVD. -""" -@kwdef struct DenseSVDAdjoint - lorentz_broadening::Float64 = 0.0 -end - function ChainRulesCore.rrule( ::typeof(PEPSKit.tsvd!), t::AbstractTensorMap, - alg::SVDrrule{A,DenseSVDAdjoint}; + alg::SVDAdjoint{IterSVD,R,B}; trunc::TruncationScheme=notrunc(), p::Real=2, -) where {A} - fwd, tsvd!_pullback = rrule(TensorKit.tsvd!, t; trunc, p, alg=alg.svd_alg) - tsvd!_completesvd_pullback(Δsvd) = tsvd!_pullback(Δsvd)..., NoTangent() - return fwd, tsvd!_completesvd_pullback -end - -""" - struct SparseSVDAdjoint(; lorentz_broadening = 0.0) - -Wrapper around the `KrylovKit.svdsolve` rrule where only the truncated decomposition is required. -""" -@kwdef struct SparseSVDAdjoint{A} - alg::A = GMRES() - lorentz_broadening::Float64 = 0.0 -end - -function ChainRulesCore.rrule( - ::typeof(PEPSKit.tsvd!), - t::AbstractTensorMap, - alg::SVDrrule{A,<:SparseSVDAdjoint}; - trunc::TruncationScheme=notrunc(), - p::Real=2, -) where {A} +) where {R,B} U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) - function tsvd!_sparsesvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + function tsvd!_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ)) Δt = similar(t) for (c, b) in blocks(Δt) Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) @@ -181,8 +155,8 @@ function ChainRulesCore.rrule( minimal_info, block(t, c), :LR, - alg.svd_alg.alg, - alg.rrule_alg.alg, + alg.fwd_alg.alg, + alg.rrule_alg, ) copyto!( b, @@ -191,11 +165,11 @@ function ChainRulesCore.rrule( end return NoTangent(), Δt, NoTangent() end - function tsvd!_sparsesvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + function tsvd!_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) return NoTangent(), ZeroTangent(), NoTangent() end - return (U, S, V, ϵ), tsvd!_sparsesvd_pullback + return (U, S, V, ϵ), tsvd!_itersvd_pullback end """ @@ -203,18 +177,16 @@ end Old SVD adjoint that does not account for the truncated part of truncated SVDs. """ -@kwdef struct NonTruncSVDAdjoint - lorentz_broadening::Float64 = 0.0 -end +struct NonTruncSVDAdjoint end # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes) function ChainRulesCore.rrule( ::typeof(PEPSKit.tsvd!), t::AbstractTensorMap, - alg::SVDrrule{A,NonTruncSVDAdjoint}; + alg::SVDAdjoint{F,NonTruncSVDAdjoint,B}; trunc::TruncationScheme=notrunc(), p::Real=2, -) where {A} +) where {F,B} U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) function tsvd!_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) @@ -223,16 +195,7 @@ function ChainRulesCore.rrule( Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c) copyto!( - b, - oldsvd_rev( - Uc, - Sc, - Vc, - ΔUc, - ΔSc, - ΔVc; - lorentz_broadening=alg.rrule_alg.lorentz_broadening, - ), + b, oldsvd_rev(Uc, Sc, Vc, ΔUc, ΔSc, ΔVc; lorentz_broadening=alg.broadening) ) end return NoTangent(), Δt, NoTangent() diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl index 7a69afea..3c17dc7d 100644 --- a/test/ctmrg/svd_wrapper.jl +++ b/test/ctmrg/svd_wrapper.jl @@ -22,12 +22,11 @@ rtol = 1e-9 r = TensorMap(randn, dtype, ℂ^m, ℂ^n) R = TensorMap(randn, space(r)) -full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=DenseSVDAdjoint()) -old_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint()) -iter_alg = SVDrrule(; # Don't make adjoint tolerance too small, g_itersvd will be weird - svd_alg=IterSVD(; alg=GKL(; krylovdim=50)), - rrule_alg=SparseSVDAdjoint(; alg=GMRES(; tol=1e-13)), +full_alg = SVDAdjoint(; fwd_alg=TensorKit.SVD(), rrule_alg=nothing) +old_alg = SVDAdjoint(; + fwd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint(), broadening=0.0 ) +iter_alg = SVDAdjoint(; fwd_alg=IterSVD(), rrule_alg=GMRES(; tol=1e-13)) # Don't make adjoint tolerance too small, g_itersvd will be weird @testset "Non-truncacted SVD" begin l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, R), r) @@ -85,7 +84,7 @@ symm_R = TensorMap(randn, dtype, space(symm_r)) @test l_itersvd_tr ≈ l_fullsvd_tr @test g_fullsvd_tr[1] ≈ g_itersvd_tr[1] rtol = rtol - iter_alg_fallback = @set iter_alg.svd_alg.fallback_threshold = 0.4 # Do dense SVD in one block, sparse SVD in the other + iter_alg_fallback = @set iter_alg.fwd_alg.fallback_threshold = 0.4 # Do dense SVD in one block, sparse SVD in the other l_itersvd_fb, g_itersvd_fb = withgradient( A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r ) From bb96664196850a61346388aef30a1a79e1cdc07b Mon Sep 17 00:00:00 2001 From: lkdvos Date: Tue, 9 Jul 2024 19:02:48 +0200 Subject: [PATCH 25/27] Small cleanup --- src/utility/svd.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/utility/svd.jl b/src/utility/svd.jl index c76ddb9f..685d050d 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -85,7 +85,6 @@ function TensorKit._compute_svddata!( dims = SectorDict{I,Int}() local Sdata for (c, b) in blocks(t) - x₀ = randn(eltype(b), size(b, 1)) howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c) if howmany / minimum(size(b)) > alg.fallback_threshold # Use dense SVD for small blocks @@ -93,8 +92,10 @@ function TensorKit._compute_svddata!( Udata[c] = U[:, 1:howmany] Vdata[c] = V[1:howmany, :] else + x₀ = randn(eltype(b), size(b, 1)) S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg) if info.converged < howmany # Fall back to dense SVD if not properly converged + @warn "Iterative SVD did not converge for block $c, falling back to dense SVD" U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) Udata[c] = U[:, 1:howmany] Vdata[c] = V[1:howmany, :] @@ -104,8 +105,8 @@ function TensorKit._compute_svddata!( end end - S = @view S[1:howmany] - if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction + resize!(S, howmany) + if @isdefined Sdata Sdata[c] = S else Sdata = SectorDict(c => S) From fa7a56aa41205f6194fa50983f61d8ad9cf1ead1 Mon Sep 17 00:00:00 2001 From: lkdvos Date: Wed, 10 Jul 2024 10:24:58 +0200 Subject: [PATCH 26/27] Update minimal julia version 1.9 --- .github/workflows/CI.yml | 2 +- Project.toml | 2 +- src/utility/svd.jl | 7 +------ 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index aad659d0..0adc6bdc 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: version: - - '1.8' # LTS version + - '1.9' - '1' # automatically expands to the latest stable 1.x release of Julia os: - ubuntu-latest diff --git a/Project.toml b/Project.toml index 8777406c..c4abe2b8 100644 --- a/Project.toml +++ b/Project.toml @@ -34,7 +34,7 @@ TensorKit = "0.12.5" TensorOperations = "4" VectorInterface = "0.4" Zygote = "0.6" -julia = "1.8" +julia = "1.9" [extras] ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" diff --git a/src/utility/svd.jl b/src/utility/svd.jl index 685d050d..740892ba 100644 --- a/src/utility/svd.jl +++ b/src/utility/svd.jl @@ -7,12 +7,7 @@ using TensorKit: NoTruncation, TruncationSpace -# Enables backwards compatibility without package extensions -CRCExt = @static if isdefined(Base, :get_extension) - Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) -else - KrylovKit.KrylovKitChainRulesCoreExt -end +const CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) """ struct SVDAdjoint(; fwd_alg = TensorKit.SVD(), rrule_alg = nothing, From 6df8efc307920df258f384a366ea4ae010b8ae23 Mon Sep 17 00:00:00 2001 From: Paul Brehmer Date: Wed, 10 Jul 2024 08:43:44 +0000 Subject: [PATCH 27/27] Remove duplicate line in left_move --- src/algorithms/ctmrg.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index 69bef21a..b2908b5b 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -373,7 +373,6 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T} else alg.trscheme end - @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6] @autoopt @tensor QQ[χ_EB D_EBabove D_EBbelow; χ_ET D_ETabove D_ETbelow] := Q_sw[χ_EB D_EBabove D_EBbelow; χ D1 D2] * Q_nw[χ D1 D2; χ_ET D_ETabove D_ETbelow]