From c390beca2cc54a102e4ce5353ab6857b17107cb1 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Mon, 4 Mar 2024 19:06:37 +0100
Subject: [PATCH 01/27] Add truncated SVD adjoint with wrapper for KrylovKit
 iterative SVD, add small test script

---
 Project.toml                 |   1 +
 examples/test_svd_adjoint.jl |  99 ++++++++++++++++++++++++
 src/PEPSKit.jl               |   4 +-
 src/utility/svd.jl           | 143 +++++++++++++++++++++++++++++++++++
 4 files changed, 246 insertions(+), 1 deletion(-)
 create mode 100644 examples/test_svd_adjoint.jl
 create mode 100644 src/utility/svd.jl

diff --git a/Project.toml b/Project.toml
index 8028adf6..d5e3a930 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,6 +15,7 @@ Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 TensorKit = "07d1fe3e-3e46-537d-9eac-e9e13d0d4cec"
 VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 MPSKit = "0.10"
diff --git a/examples/test_svd_adjoint.jl b/examples/test_svd_adjoint.jl
new file mode 100644
index 00000000..e11f0eff
--- /dev/null
+++ b/examples/test_svd_adjoint.jl
@@ -0,0 +1,99 @@
+using LinearAlgebra
+using TensorKit
+using ChainRulesCore, Zygote
+using PEPSKit
+
+# Non-proper truncated SVD with outdated adjoint
+oldsvd(t::AbstractTensorMap, χ::Int; kwargs...) = itersvd(t, χ; kwargs...)
+
+# Outdated adjoint not taking truncated part into account
+function ChainRulesCore.rrule(
+    ::typeof(oldsvd), t::AbstractTensorMap, χ::Int; εbroad=0, kwargs...
+)
+    U, S, V = oldsvd(t, χ; kwargs...)
+
+    function oldsvd_pullback((ΔU, ΔS, ΔV))
+        ∂t = similar(t)
+        for (c, b) in blocks(∂t)
+            copyto!(
+                b,
+                oldsvd_rev(
+                    block(U, c),
+                    block(S, c),
+                    block(V, c),
+                    block(ΔU, c),
+                    block(ΔS, c),
+                    block(ΔV, c);
+                    εbroad,
+                ),
+            )
+        end
+        return NoTangent(), ∂t, NoTangent()
+    end
+
+    return (U, S, V), oldsvd_pullback
+end
+
+function oldsvd_rev(
+    U::AbstractMatrix,
+    S::AbstractMatrix,
+    V::AbstractMatrix,
+    ΔU,
+    ΔS,
+    ΔV;
+    εbroad=0,
+    atol::Real=0,
+    rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4),
+)
+    S = diagm(S)
+    V = copy(V')
+
+    tol = atol > 0 ? atol : rtol * S[1, 1]
+    F = PEPSKit.invert_S²(S, tol; εbroad)  # Includes Lorentzian broadening
+    S⁻¹ = pinv(S; atol=tol)
+
+    # dS contribution
+    term = ΔS isa ZeroTangent ? ΔS : Diagonal(diag(ΔS))
+
+    # dU₁ and dV₁ off-diagonal contribution
+    J = F .* (U' * ΔU)
+    term += (J + J') * S
+    VΔV = (V * ΔV')
+    K = F .* VΔV
+    term += S * (K + K')
+
+    # dV₁ diagonal contribution (diagonal of dU₁ is gauged away)
+    if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent)
+        L = Diagonal(diag(VΔV))
+        term += 0.5 * S⁻¹ * (L' - L)
+    end
+    ΔA = U * term * V
+
+    # Projector contribution for non-square A
+    UUd = U * U'
+    VdV = V' * V
+    Uproj = one(UUd) - UUd
+    Vproj = one(VdV) - VdV
+    ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj  # Old wrong stuff
+
+    return ΔA
+end
+
+# Loss function taking the nfirst first singular vectors into account
+function nfirst_loss(A, svdfunc; nfirst=1)
+    U, _, V = svdfunc(A)
+    U = convert(Array, U)
+    V = convert(Array, V)
+    return real(sum([U[i, i] * V[i, i] for i in 1:nfirst]))
+end
+
+m, n = 30, 20
+dtype = ComplexF64
+χ = 15
+r = TensorMap(randn, dtype, ℂ^m ← ℂ^n)
+
+ltensorkit, gtensorkit = withgradient(A -> nfirst_loss(A, x -> oldsvd(x, χ); nfirst=3), r)
+litersvd, gitersvd = withgradient(A -> nfirst_loss(A, x -> itersvd(x, χ); nfirst=3), r)
+
+@show ltensorkit ≈ litersvd
+@show gtensorkit ≈ gitersvd
\ No newline at end of file
diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl
index b2dcebbe..456482e0 100644
--- a/src/PEPSKit.jl
+++ b/src/PEPSKit.jl
@@ -6,12 +6,13 @@ using TensorKit,
     KrylovKit, MPSKit, OptimKit, Base.Threads, Base.Iterators, Parameters, Printf
 using ChainRulesCore
 
-using LinearAlgebra: LinearAlgebra
+using LinearAlgebra
 
 export CTMRG, CTMRG2
 export leading_boundary
 
 include("utility/util.jl")
+include("utility/svd.jl")
 
 include("states/abstractpeps.jl")
 include("states/infinitepeps.jl")
@@ -46,5 +47,6 @@ export InfinitePEPO, InfiniteTransferPEPO
 export initializeMPS, initializePEPS
 export PEPOOptimize, pepo_opt_environments
 export symmetrize, None, Depth, Full
+export itersvd
 
 end # module
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
new file mode 100644
index 00000000..d3b149d0
--- /dev/null
+++ b/src/utility/svd.jl
@@ -0,0 +1,143 @@
+# Computation of F in SVD adjoint, including Lorentzian broadening
+function invert_S²(S::AbstractMatrix{T}, tol::Real; εbroad=0) where {T<:Real}
+    F = similar(S)
+    @inbounds for i in axes(F, 1), j in axes(F, 2)
+        F[i, j] = if i == j
+            zero(T)
+        else
+            sᵢ, sⱼ = S[i, i], S[j, j]
+            Δs = abs(sⱼ - sᵢ) < tol ? tol : sⱼ^2 - sᵢ^2
+            εbroad > 0 && (Δs = lorentz_broaden(Δs, εbroad))
+            1 / Δs
+        end
+    end
+    return F
+end
+
+# Lorentzian broadening for SVD adjoint singularities
+function lorentz_broaden(x::Real, ε=1e-12)
+    x′ = 1 / x
+    return x′ / (x′^2 + ε)
+end
+
+# Proper truncated SVD using iterative solver
+function itersvd(
+    t::AbstractTensorMap,
+    χ::Int;
+    εbroad=0,
+    solverkwargs=(; krylovdim=χ + 5, tol=1e2eps(real(scalartype(t)))),
+)
+    vals, lvecs, rvecs, info = svdsolve(t.data, dim(codomain(t)), χ; solverkwargs...)
+    truncspace = field(t)^χ
+    if info.converged < χ  # Fall back to dense SVD
+        @warn "falling back to dense SVD solver since length(S) < χ"
+        return tsvd(t; trunc=truncdim(χ), alg=TensorKit.SVD())
+    else
+        vals = @view(vals[1:χ])
+        lvecs = @view(lvecs[1:χ])
+        rvecs = @view(rvecs[1:χ])
+    end
+    U = TensorMap(hcat(lvecs...), codomain(t) ← truncspace)
+    S = TensorMap(diagm(vals), truncspace ← truncspace)
+    V = TensorMap(copy(hcat(rvecs...)'), truncspace ← domain(t))
+    return U, S, V
+end
+
+# Reverse rule adopted from tsvd! rrule as found in TensorKit.jl
+function ChainRulesCore.rrule(
+    ::typeof(itersvd), t::AbstractTensorMap, χ::Int; εbroad=0, kwargs...
+)
+    U, S, V = itersvd(t, χ; kwargs...)
+
+    function itersvd_pullback((ΔU, ΔS, ΔV))
+        ∂t = similar(t)
+        for (c, b) in blocks(∂t)
+            copyto!(
+                b,
+                itersvd_rev(
+                    block(t, c),
+                    block(U, c),
+                    block(S, c),
+                    block(V, c),
+                    block(ΔU, c),
+                    block(ΔS, c),
+                    block(ΔV, c);
+                    εbroad,
+                ),
+            )
+        end
+        return NoTangent(), ∂t, NoTangent()
+    end
+
+    return (U, S, V), itersvd_pullback
+end
+
+# SVD adjoint with proper truncation
+function itersvd_rev(
+    A::AbstractMatrix,
+    U::AbstractMatrix,
+    S::AbstractMatrix,
+    V::AbstractMatrix,
+    ΔU,
+    ΔS,
+    ΔV;
+    εbroad=0,
+    atol::Real=0,
+    rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4),
+)
+    Ad = copy(A')
+    tol = atol > 0 ? atol : rtol * S[1, 1]
+    F = invert_S²(S, tol; εbroad)  # Includes Lorentzian broadening
+    S⁻¹ = pinv(S; atol=tol)
+
+    # dS contribution
+    term = ΔS isa ZeroTangent ? ΔS : Diagonal(real.(ΔS))  # Implicitly performs 𝕀 ∘ dS
+
+    # dU₁ and dV₁ off-diagonal contribution
+    J = F .* (U' * ΔU)
+    term += (J + J') * S
+    VΔV = (V * ΔV')
+    K = F .* VΔV
+    term += S * (K + K')
+
+    # dV₁ diagonal contribution (diagonal of dU₁ is gauged away)
+    if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent)
+        L = Diagonal(VΔV)  # Implicitly performs 𝕀 ∘ dV
+        term += 0.5 * S⁻¹ * (L' - L)
+    end
+    ΔA = U * term * V
+
+    # Projector contribution for non-square A and dU₂ and dV₂
+    UUd = U * U'
+    VdV = V' * V
+    Uproj = one(UUd) - UUd
+    Vproj = one(VdV) - VdV
+    m, k, n = size(U, 1), size(U, 2), size(V, 2)
+    dimγ = k * m  # Vectorized dimension of γ-matrix
+
+    # Truncation contribution from dU₂ and dV₂
+    # TODO: Use KrylovKit instead of IterativeSolvers
+    Sop = LinearMap(k * m + k * n) do v  # Left-preconditioned linear problem
+        γ = reshape(@view(v[1:dimγ]), (k, m))
+        γd = reshape(@view(v[(dimγ + 1):end]), (k, n))
+        Γ1 = γ - S⁻¹ * γd * Vproj * Ad
+        Γ2 = γd - S⁻¹ * γ * Uproj * A
+        vcat(reshape(Γ1, :), reshape(Γ2, :))
+    end
+    if ΔU isa ZeroTangent && ΔV isa ZeroTangent
+        γ = gmres(Sop, zeros(eltype(A), k * m + k * n))
+    else
+        # Explicit left-preconditioning
+        # Set relative tolerance to machine precision to converge SVD gradient error properly
+        γ = gmres(
+            Sop,
+            vcat(reshape(S⁻¹ * ΔU' * Uproj, :), reshape(S⁻¹ * ΔV * Vproj, :));
+            reltol=eps(real(eltype(A))),
+        )
+    end
+    γA = reshape(@view(γ[1:dimγ]), k, m)
+    γAd = reshape(@view(γ[(dimγ + 1):end]), k, n)
+    ΔA += Uproj * γA' * V + U * γAd * Vproj
+
+    return ΔA
+end

From 51507ce14eae2fff8cfaa94089e58eb009e28b26 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 5 Mar 2024 11:16:41 +0100
Subject: [PATCH 02/27] Use KrylovKit.linsolve for truncation linear problem,
 make loss function differentiable

---
 Project.toml                 |  1 +
 examples/test_svd_adjoint.jl | 34 +++++++++++++++++++---------------
 src/utility/svd.jl           | 28 ++++++++++++----------------
 3 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/Project.toml b/Project.toml
index d5e3a930..2826518e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,6 +6,7 @@ version = "0.1.0"
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MPSKit = "bb1c41ca-d63c-52ed-829e-0820dda26502"
diff --git a/examples/test_svd_adjoint.jl b/examples/test_svd_adjoint.jl
index e11f0eff..fcd50f47 100644
--- a/examples/test_svd_adjoint.jl
+++ b/examples/test_svd_adjoint.jl
@@ -1,6 +1,6 @@
 using LinearAlgebra
 using TensorKit
-using ChainRulesCore, Zygote
+using ChainRulesCore, ChainRulesTestUtils, Zygote
 using PEPSKit
 
 # Non-proper truncated SVD with outdated adjoint
@@ -45,9 +45,6 @@ function oldsvd_rev(
     atol::Real=0,
     rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4),
 )
-    S = diagm(S)
-    V = copy(V')
-
     tol = atol > 0 ? atol : rtol * S[1, 1]
     F = PEPSKit.invert_S²(S, tol; εbroad)  # Includes Lorentzian broadening
     S⁻¹ = pinv(S; atol=tol)
@@ -74,26 +71,33 @@ function oldsvd_rev(
     VdV = V' * V
     Uproj = one(UUd) - UUd
     Vproj = one(VdV) - VdV
-    ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj  # Old wrong stuff
+    ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj  # Wrong truncation contribution
 
     return ΔA
 end
 
-# Loss function taking the nfirst first singular vectors into account
-function nfirst_loss(A, svdfunc; nfirst=1)
+# Gauge-invariant loss function
+function lossfun(A, svdfunc)
     U, _, V = svdfunc(A)
-    U = convert(Array, U)
-    V = convert(Array, V)
-    return real(sum([U[i, i] * V[i, i] for i in 1:nfirst]))
+    # return real(sum((U * V).data)) # TODO: code up sum for AbstractTensorMap with rrule
+    return real(tr(U * V))  # trace only allows for m=n
 end
 
-m, n = 30, 20
+m, n = 30, 30
 dtype = ComplexF64
-χ = 15
+χ = 20
 r = TensorMap(randn, dtype, ℂ^m ← ℂ^n)
 
-ltensorkit, gtensorkit = withgradient(A -> nfirst_loss(A, x -> oldsvd(x, χ); nfirst=3), r)
-litersvd, gitersvd = withgradient(A -> nfirst_loss(A, x -> itersvd(x, χ); nfirst=3), r)
+println("Non-truncated SVD")
+ltensorkit, gtensorkit = withgradient(A -> lossfun(A, x -> oldsvd(x, min(m, n))), r)
+litersvd, gitersvd = withgradient(A -> lossfun(A, x -> itersvd(x, min(m, n))), r)
+@show ltensorkit ≈ litersvd
+@show norm(gtensorkit[1] - gitersvd[1])
 
+println("\nTruncated SVD to χ=$χ:")
+ltensorkit, gtensorkit = withgradient(A -> lossfun(A, x -> oldsvd(x, χ)), r)
+litersvd, gitersvd = withgradient(A -> lossfun(A, x -> itersvd(x, χ)), r)
 @show ltensorkit ≈ litersvd
-@show gtensorkit ≈ gitersvd
\ No newline at end of file
+@show norm(gtensorkit[1] - gitersvd[1])
+
+# TODO: Finite-difference check via test_rrule
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index d3b149d0..ee3cbd4b 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -116,28 +116,24 @@ function itersvd_rev(
     dimγ = k * m  # Vectorized dimension of γ-matrix
 
     # Truncation contribution from dU₂ and dV₂
-    # TODO: Use KrylovKit instead of IterativeSolvers
-    Sop = LinearMap(k * m + k * n) do v  # Left-preconditioned linear problem
-        γ = reshape(@view(v[1:dimγ]), (k, m))
-        γd = reshape(@view(v[(dimγ + 1):end]), (k, n))
-        Γ1 = γ - S⁻¹ * γd * Vproj * Ad
-        Γ2 = γd - S⁻¹ * γ * Uproj * A
-        vcat(reshape(Γ1, :), reshape(Γ2, :))
+    function svdlinprob(v)  # Left-preconditioned linear problem
+        γ1 = reshape(@view(v[1:dimγ]), (k, m))
+        γ2 = reshape(@view(v[(dimγ + 1):end]), (k, n))
+        Γ1 = γ1 - S⁻¹ * γ2 * Vproj * Ad
+        Γ2 = γ2 - S⁻¹ * γ1 * Uproj * A
+        return vcat(reshape(Γ1, :), reshape(Γ2, :))
     end
     if ΔU isa ZeroTangent && ΔV isa ZeroTangent
-        γ = gmres(Sop, zeros(eltype(A), k * m + k * n))
+        γ = linsolve(Sop, zeros(eltype(A), k * m + k * n))
     else
         # Explicit left-preconditioning
         # Set relative tolerance to machine precision to converge SVD gradient error properly
-        γ = gmres(
-            Sop,
-            vcat(reshape(S⁻¹ * ΔU' * Uproj, :), reshape(S⁻¹ * ΔV * Vproj, :));
-            reltol=eps(real(eltype(A))),
-        )
+        y = vcat(reshape(S⁻¹ * ΔU' * Uproj, :), reshape(S⁻¹ * ΔV * Vproj, :))
+        γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A))))
     end
-    γA = reshape(@view(γ[1:dimγ]), k, m)
-    γAd = reshape(@view(γ[(dimγ + 1):end]), k, n)
-    ΔA += Uproj * γA' * V + U * γAd * Vproj
+    γA1 = reshape(@view(γ[1:dimγ]), k, m)
+    γA2 = reshape(@view(γ[(dimγ + 1):end]), k, n)
+    ΔA += Uproj * γA1' * V + U * γA2 * Vproj
 
     return ΔA
 end

From d3a31fb55d1661d8cd0c01778f53fd64714b933d Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 5 Mar 2024 16:09:22 +0100
Subject: [PATCH 03/27] Improve loss function, compare SVD gradient with
 TensorKit.tsvd gradient

---
 examples/test_svd_adjoint.jl | 38 +++++++++++++++++++++---------------
 src/utility/svd.jl           |  1 +
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/examples/test_svd_adjoint.jl b/examples/test_svd_adjoint.jl
index fcd50f47..69124870 100644
--- a/examples/test_svd_adjoint.jl
+++ b/examples/test_svd_adjoint.jl
@@ -3,7 +3,7 @@ using TensorKit
 using ChainRulesCore, ChainRulesTestUtils, Zygote
 using PEPSKit
 
-# Non-proper truncated SVD with outdated adjoint
+# Truncated SVD with outdated adjoint
 oldsvd(t::AbstractTensorMap, χ::Int; kwargs...) = itersvd(t, χ; kwargs...)
 
 # Outdated adjoint not taking truncated part into account
@@ -77,27 +77,33 @@ function oldsvd_rev(
 end
 
 # Gauge-invariant loss function
-function lossfun(A, svdfunc)
+function lossfun(A, R=TensorMap(randn, space(A)), svdfunc=tsvd)
     U, _, V = svdfunc(A)
-    # return real(sum((U * V).data)) # TODO: code up sum for AbstractTensorMap with rrule
-    return real(tr(U * V))  # trace only allows for m=n
+    return real(dot(R, U * V))  # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n
 end
 
-m, n = 30, 30
+m, n = 20, 30
 dtype = ComplexF64
-χ = 20
+χ = 15
 r = TensorMap(randn, dtype, ℂ^m ← ℂ^n)
+R = TensorMap(randn, space(r))
 
-println("Non-truncated SVD")
-ltensorkit, gtensorkit = withgradient(A -> lossfun(A, x -> oldsvd(x, min(m, n))), r)
-litersvd, gitersvd = withgradient(A -> lossfun(A, x -> itersvd(x, min(m, n))), r)
-@show ltensorkit ≈ litersvd
+println("Non-truncated SVD:")
+loldsvd, goldsvd = withgradient(A -> lossfun(A, R, x -> oldsvd(x, min(m, n))), r)
+ltensorkit, gtensorkit = withgradient(
+    A -> lossfun(A, R, x -> tsvd(x; trunc=truncdim(min(m, n)))), r
+)
+litersvd, gitersvd = withgradient(A -> lossfun(A, R, x -> itersvd(x, min(m, n))), r)
+@show loldsvd ≈ ltensorkit ≈ litersvd
+@show norm(gtensorkit[1] - goldsvd[1])
 @show norm(gtensorkit[1] - gitersvd[1])
 
-println("\nTruncated SVD to χ=$χ:")
-ltensorkit, gtensorkit = withgradient(A -> lossfun(A, x -> oldsvd(x, χ)), r)
-litersvd, gitersvd = withgradient(A -> lossfun(A, x -> itersvd(x, χ)), r)
-@show ltensorkit ≈ litersvd
+println("\nTruncated SVD with χ=$χ:")
+loldsvd, goldsvd = withgradient(A -> lossfun(A, R, x -> oldsvd(x, χ)), r)
+ltensorkit, gtensorkit = withgradient(
+    A -> lossfun(A, R, x -> tsvd(x; trunc=truncdim(χ))), r
+)
+litersvd, gitersvd = withgradient(A -> lossfun(A, R, x -> itersvd(x, χ)), r)
+@show loldsvd ≈ ltensorkit ≈ litersvd
+@show norm(gtensorkit[1] - goldsvd[1])
 @show norm(gtensorkit[1] - gitersvd[1])
-
-# TODO: Finite-difference check via test_rrule
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index ee3cbd4b..975c72cb 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -117,6 +117,7 @@ function itersvd_rev(
 
     # Truncation contribution from dU₂ and dV₂
     function svdlinprob(v)  # Left-preconditioned linear problem
+        # TODO: make v a Tuple instead of concatening two vectors
         γ1 = reshape(@view(v[1:dimγ]), (k, m))
         γ2 = reshape(@view(v[(dimγ + 1):end]), (k, n))
         Γ1 = γ1 - S⁻¹ * γ2 * Vproj * Ad

From 8ac307e490b6fd7c68793f12670b478e2277e999 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 26 Mar 2024 14:46:50 +0100
Subject: [PATCH 04/27] Update SVD adjoint linear problem to use Tuple and
 remove reshapes

---
 src/utility/svd.jl | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 975c72cb..ccca32d4 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -112,29 +112,21 @@ function itersvd_rev(
     VdV = V' * V
     Uproj = one(UUd) - UUd
     Vproj = one(VdV) - VdV
-    m, k, n = size(U, 1), size(U, 2), size(V, 2)
-    dimγ = k * m  # Vectorized dimension of γ-matrix
 
     # Truncation contribution from dU₂ and dV₂
     function svdlinprob(v)  # Left-preconditioned linear problem
-        # TODO: make v a Tuple instead of concatening two vectors
-        γ1 = reshape(@view(v[1:dimγ]), (k, m))
-        γ2 = reshape(@view(v[(dimγ + 1):end]), (k, n))
-        Γ1 = γ1 - S⁻¹ * γ2 * Vproj * Ad
-        Γ2 = γ2 - S⁻¹ * γ1 * Uproj * A
-        return vcat(reshape(Γ1, :), reshape(Γ2, :))
+        Γ1 = v[1] - S⁻¹ * v[2] * Vproj * Ad
+        Γ2 = v[2] - S⁻¹ * v[1] * Uproj * A
+        return (Γ1, Γ2)
     end
     if ΔU isa ZeroTangent && ΔV isa ZeroTangent
-        γ = linsolve(Sop, zeros(eltype(A), k * m + k * n))
+        m, k, n = size(U, 1), size(U, 2), size(V, 2)
+        γ = linsolve(Sop, (zeros(eltype(A), k * m), zeros(eltype(A), k * n)))
     else
-        # Explicit left-preconditioning
-        # Set relative tolerance to machine precision to converge SVD gradient error properly
-        y = vcat(reshape(S⁻¹ * ΔU' * Uproj, :), reshape(S⁻¹ * ΔV * Vproj, :))
+        y = (S⁻¹ * ΔU' * Uproj, S⁻¹ * ΔV * Vproj)
         γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A))))
     end
-    γA1 = reshape(@view(γ[1:dimγ]), k, m)
-    γA2 = reshape(@view(γ[(dimγ + 1):end]), k, n)
-    ΔA += Uproj * γA1' * V + U * γA2 * Vproj
+    ΔA += Uproj * γ[1]' * V + U * γ[2] * Vproj
 
     return ΔA
 end

From 80db1c05d5d388dd73d48e1cc888030f4684e2ce Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Thu, 11 Apr 2024 12:23:23 +0200
Subject: [PATCH 05/27] Fix ZeroTangent case for linear problem

---
 src/utility/svd.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index ccca32d4..af9baa2a 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -121,7 +121,8 @@ function itersvd_rev(
     end
     if ΔU isa ZeroTangent && ΔV isa ZeroTangent
         m, k, n = size(U, 1), size(U, 2), size(V, 2)
-        γ = linsolve(Sop, (zeros(eltype(A), k * m), zeros(eltype(A), k * n)))
+        y = (zeros(eltype(A), k * m), zeros(eltype(A), k * n))
+        γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A))))
     else
         y = (S⁻¹ * ΔU' * Uproj, S⁻¹ * ΔV * Vproj)
         γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A))))

From ae96298e0b1201b2d39d8d3dc6129d99449fc9d9 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Thu, 20 Jun 2024 17:16:31 +0200
Subject: [PATCH 06/27] Add SVD wrapper structs and function, utilize tsvd
 machinery, convert SVD adjoint script to test

---
 examples/test_svd_adjoint.jl | 109 ------------------
 src/PEPSKit.jl               |   1 +
 src/utility/svd.jl           | 210 +++++++++++++++++++++++------------
 test/svdwrap.jl              |  63 +++++++++++
 4 files changed, 206 insertions(+), 177 deletions(-)
 delete mode 100644 examples/test_svd_adjoint.jl
 create mode 100644 test/svdwrap.jl

diff --git a/examples/test_svd_adjoint.jl b/examples/test_svd_adjoint.jl
deleted file mode 100644
index 69124870..00000000
--- a/examples/test_svd_adjoint.jl
+++ /dev/null
@@ -1,109 +0,0 @@
-using LinearAlgebra
-using TensorKit
-using ChainRulesCore, ChainRulesTestUtils, Zygote
-using PEPSKit
-
-# Truncated SVD with outdated adjoint
-oldsvd(t::AbstractTensorMap, χ::Int; kwargs...) = itersvd(t, χ; kwargs...)
-
-# Outdated adjoint not taking truncated part into account
-function ChainRulesCore.rrule(
-    ::typeof(oldsvd), t::AbstractTensorMap, χ::Int; εbroad=0, kwargs...
-)
-    U, S, V = oldsvd(t, χ; kwargs...)
-
-    function oldsvd_pullback((ΔU, ΔS, ΔV))
-        ∂t = similar(t)
-        for (c, b) in blocks(∂t)
-            copyto!(
-                b,
-                oldsvd_rev(
-                    block(U, c),
-                    block(S, c),
-                    block(V, c),
-                    block(ΔU, c),
-                    block(ΔS, c),
-                    block(ΔV, c);
-                    εbroad,
-                ),
-            )
-        end
-        return NoTangent(), ∂t, NoTangent()
-    end
-
-    return (U, S, V), oldsvd_pullback
-end
-
-function oldsvd_rev(
-    U::AbstractMatrix,
-    S::AbstractMatrix,
-    V::AbstractMatrix,
-    ΔU,
-    ΔS,
-    ΔV;
-    εbroad=0,
-    atol::Real=0,
-    rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4),
-)
-    tol = atol > 0 ? atol : rtol * S[1, 1]
-    F = PEPSKit.invert_S²(S, tol; εbroad)  # Includes Lorentzian broadening
-    S⁻¹ = pinv(S; atol=tol)
-
-    # dS contribution
-    term = ΔS isa ZeroTangent ? ΔS : Diagonal(diag(ΔS))
-
-    # dU₁ and dV₁ off-diagonal contribution
-    J = F .* (U' * ΔU)
-    term += (J + J') * S
-    VΔV = (V * ΔV')
-    K = F .* VΔV
-    term += S * (K + K')
-
-    # dV₁ diagonal contribution (diagonal of dU₁ is gauged away)
-    if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent)
-        L = Diagonal(diag(VΔV))
-        term += 0.5 * S⁻¹ * (L' - L)
-    end
-    ΔA = U * term * V
-
-    # Projector contribution for non-square A
-    UUd = U * U'
-    VdV = V' * V
-    Uproj = one(UUd) - UUd
-    Vproj = one(VdV) - VdV
-    ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj  # Wrong truncation contribution
-
-    return ΔA
-end
-
-# Gauge-invariant loss function
-function lossfun(A, R=TensorMap(randn, space(A)), svdfunc=tsvd)
-    U, _, V = svdfunc(A)
-    return real(dot(R, U * V))  # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n
-end
-
-m, n = 20, 30
-dtype = ComplexF64
-χ = 15
-r = TensorMap(randn, dtype, ℂ^m ← ℂ^n)
-R = TensorMap(randn, space(r))
-
-println("Non-truncated SVD:")
-loldsvd, goldsvd = withgradient(A -> lossfun(A, R, x -> oldsvd(x, min(m, n))), r)
-ltensorkit, gtensorkit = withgradient(
-    A -> lossfun(A, R, x -> tsvd(x; trunc=truncdim(min(m, n)))), r
-)
-litersvd, gitersvd = withgradient(A -> lossfun(A, R, x -> itersvd(x, min(m, n))), r)
-@show loldsvd ≈ ltensorkit ≈ litersvd
-@show norm(gtensorkit[1] - goldsvd[1])
-@show norm(gtensorkit[1] - gitersvd[1])
-
-println("\nTruncated SVD with χ=$χ:")
-loldsvd, goldsvd = withgradient(A -> lossfun(A, R, x -> oldsvd(x, χ)), r)
-ltensorkit, gtensorkit = withgradient(
-    A -> lossfun(A, R, x -> tsvd(x; trunc=truncdim(χ))), r
-)
-litersvd, gitersvd = withgradient(A -> lossfun(A, R, x -> itersvd(x, χ)), r)
-@show loldsvd ≈ ltensorkit ≈ litersvd
-@show norm(gtensorkit[1] - goldsvd[1])
-@show norm(gtensorkit[1] - gitersvd[1])
diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl
index 456482e0..f195b882 100644
--- a/src/PEPSKit.jl
+++ b/src/PEPSKit.jl
@@ -42,6 +42,7 @@ module Defaults
     const tol = 1e-12
 end
 
+export FullSVD, IterSVD, OldSVD, svdwrap
 export InfinitePEPS, InfiniteTransferPEPS
 export InfinitePEPO, InfiniteTransferPEPO
 export initializeMPS, initializePEPS
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index af9baa2a..d06b27c8 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -1,97 +1,164 @@
-# Computation of F in SVD adjoint, including Lorentzian broadening
-function invert_S²(S::AbstractMatrix{T}, tol::Real; εbroad=0) where {T<:Real}
-    F = similar(S)
-    @inbounds for i in axes(F, 1), j in axes(F, 2)
-        F[i, j] = if i == j
-            zero(T)
-        else
-            sᵢ, sⱼ = S[i, i], S[j, j]
-            Δs = abs(sⱼ - sᵢ) < tol ? tol : sⱼ^2 - sᵢ^2
-            εbroad > 0 && (Δs = lorentz_broaden(Δs, εbroad))
-            1 / Δs
+import TensorKit:
+    SectorDict,
+    _empty_svdtensors,
+    _compute_svddata!,
+    _truncate!,
+    _implement_svdtruncation!,
+    _create_svdtensors
+
+# Plain copy of tsvd!(...) from TensorKit to lift alg type restriction
+function _tensorkit_svd!(
+    t::TensorMap;
+    trunc::TruncationScheme=TensorKit.NoTruncation(),
+    p::Real=2,
+    alg=TensorKit.SVD(),
+)
+    #early return
+    if isempty(blocksectors(t))
+        truncerr = zero(real(scalartype(t)))
+        return _empty_svdtensors(t)..., truncerr
+    end
+
+    S = spacetype(t)
+    Udata, Σdata, Vdata, dims = _compute_svddata!(t, alg)
+    if !isa(trunc, TensorKit.NoTruncation)
+        Σdata, truncerr = _truncate!(Σdata, trunc, p)
+        Udata, Σdata, Vdata, dims = _implement_svdtruncation!(t, Udata, Σdata, Vdata, dims)
+        W = S(dims)
+    else
+        truncerr = abs(zero(scalartype(t)))
+        W = S(dims)
+        if length(domain(t)) == 1 && domain(t)[1] ≅ W
+            W = domain(t)[1]
+        elseif length(codomain(t)) == 1 && codomain(t)[1] ≅ W
+            W = codomain(t)[1]
         end
     end
-    return F
+    return _create_svdtensors(t, Udata, Σdata, Vdata, W)..., truncerr
 end
 
-# Lorentzian broadening for SVD adjoint singularities
-function lorentz_broaden(x::Real, ε=1e-12)
-    x′ = 1 / x
-    return x′ / (x′^2 + ε)
+# Wrapper struct around TensorKit's SVD algorithms
+@kwdef struct FullSVD
+    alg::Union{<:TensorKit.SVD,<:TensorKit.SDD} = TensorKit.SVD()
+    lorentz_broad::Float64 = 0.0
+end
+
+function svdwrap(t::AbstractTensorMap, alg::FullSVD; trunc=notrunc(), kwargs...)
+    # TODO: Replace _tensorkit_svd! with just tsvd eventually to use the full TensorKit machinery
+    return _tensorkit_svd!(copy(t); trunc, alg.alg)
 end
 
-# Proper truncated SVD using iterative solver
-function itersvd(
-    t::AbstractTensorMap,
-    χ::Int;
-    εbroad=0,
-    solverkwargs=(; krylovdim=χ + 5, tol=1e2eps(real(scalartype(t)))),
+function ChainRulesCore.rrule(
+    ::typeof(svdwrap), t::AbstractTensorMap, alg::FullSVD; trunc=notrunc(), kwargs...
 )
-    vals, lvecs, rvecs, info = svdsolve(t.data, dim(codomain(t)), χ; solverkwargs...)
-    truncspace = field(t)^χ
-    if info.converged < χ  # Fall back to dense SVD
-        @warn "falling back to dense SVD solver since length(S) < χ"
-        return tsvd(t; trunc=truncdim(χ), alg=TensorKit.SVD())
-    else
-        vals = @view(vals[1:χ])
-        lvecs = @view(lvecs[1:χ])
-        rvecs = @view(rvecs[1:χ])
+    tsvd_return, tsvd!_pullback = ChainRulesCore.rrule(tsvd!, t; alg=TensorKit.SVD(), trunc)
+    function svdwrap_fullsvd_pullback(Δ)
+        return tsvd!_pullback(Δ)..., NoTangent()
+    end
+    return tsvd_return, svdwrap_fullsvd_pullback
+end
+
+# Wrapper around Krylov Kit's GKL iterative SVD solver
+@kwdef struct IterSVD
+    alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25)
+    howmany::Int = 20
+    lorentz_broad::Float64 = 0.0
+    alg_rrule::Union{GMRES,BiCGStab,Arnoldi} = GMRES(; tol=1e-14)
+end
+
+function svdwrap(t::AbstractTensorMap, alg::IterSVD; trunc=notrunc(), kwargs...)
+    U, S, V, = _tensorkit_svd!(copy(t); trunc, alg)  # TODO: Also replace this with tsvd eventually
+    ϵ = norm(t - U * S * V)  # Compute truncation error separately
+    return U, S, V, ϵ
+end
+
+# Compute SVD data block-wise using KrylovKit algorithm
+function TensorKit._compute_svddata!(t::TensorMap, alg::IterSVD)
+    InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!)
+    I = sectortype(t)
+    A = storagetype(t)
+    Udata = SectorDict{I,A}()
+    Vdata = SectorDict{I,A}()
+    dims = SectorDict{I,Int}()
+    local Σdata
+    for (c, b) in blocks(t)
+        x₀ = randn(eltype(b), size(b, 1))
+        Σ, lvecs, rvecs, info = svdsolve(b, x₀, alg.howmany, :LR, alg.alg)
+        if info.converged < alg.howmany  # Fall back to dense SVD if not properly converged
+            U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
+            Udata[c] = U
+            Vdata[c] = V
+        else
+            Udata[c] = stack(lvecs)
+            Vdata[c] = stack(rvecs)'
+        end
+        if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction
+            Σdata[c] = Σ
+        else
+            Σdata = SectorDict(c => Σ)
+        end
+        dims[c] = length(Σ)
     end
-    U = TensorMap(hcat(lvecs...), codomain(t) ← truncspace)
-    S = TensorMap(diagm(vals), truncspace ← truncspace)
-    V = TensorMap(copy(hcat(rvecs...)'), truncspace ← domain(t))
-    return U, S, V
+    return Udata, Σdata, Vdata, dims
+end
+
+# TODO: IterSVD adjoint utilizing KryloVKit svdsolve adjoint
+
+# Full SVD with old adjoint that doesn't account for truncation properly
+@kwdef struct OldSVD{A<:Union{FullSVD,IterSVD}}
+    alg::A = FullSVD()
+    lorentz_broad::Float64 = 0.0
+end
+
+function svdwrap(t::AbstractTensorMap, alg::OldSVD; kwargs...)
+    return svdwrap(t, alg.alg; kwargs...)
 end
 
-# Reverse rule adopted from tsvd! rrule as found in TensorKit.jl
+# Outdated adjoint not taking truncated part into account for testing purposes
 function ChainRulesCore.rrule(
-    ::typeof(itersvd), t::AbstractTensorMap, χ::Int; εbroad=0, kwargs...
+    ::typeof(svdwrap), t::AbstractTensorMap, alg::OldSVD; kwargs...
 )
-    U, S, V = itersvd(t, χ; kwargs...)
+    U, S, V, ϵ = svdwrap(t, alg; kwargs...)
 
-    function itersvd_pullback((ΔU, ΔS, ΔV))
+    function svdwrap_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         ∂t = similar(t)
         for (c, b) in blocks(∂t)
             copyto!(
                 b,
-                itersvd_rev(
-                    block(t, c),
+                oldsvd_rev(
                     block(U, c),
                     block(S, c),
                     block(V, c),
                     block(ΔU, c),
                     block(ΔS, c),
                     block(ΔV, c);
-                    εbroad,
+                    lorentz_broad=alg.lorentz_broad,
                 ),
             )
         end
         return NoTangent(), ∂t, NoTangent()
     end
 
-    return (U, S, V), itersvd_pullback
+    return (U, S, V, ϵ), svdwrap_oldsvd_pullback
 end
 
-# SVD adjoint with proper truncation
-function itersvd_rev(
-    A::AbstractMatrix,
+function oldsvd_rev(
     U::AbstractMatrix,
     S::AbstractMatrix,
     V::AbstractMatrix,
     ΔU,
     ΔS,
     ΔV;
-    εbroad=0,
+    lorentz_broad=0,
     atol::Real=0,
     rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4),
 )
-    Ad = copy(A')
     tol = atol > 0 ? atol : rtol * S[1, 1]
-    F = invert_S²(S, tol; εbroad)  # Includes Lorentzian broadening
+    F = _invert_S²(S, tol, lorentz_broad)  # Includes Lorentzian broadening
     S⁻¹ = pinv(S; atol=tol)
 
     # dS contribution
-    term = ΔS isa ZeroTangent ? ΔS : Diagonal(real.(ΔS))  # Implicitly performs 𝕀 ∘ dS
+    term = ΔS isa ZeroTangent ? ΔS : Diagonal(diag(ΔS))
 
     # dU₁ and dV₁ off-diagonal contribution
     J = F .* (U' * ΔU)
@@ -102,32 +169,39 @@ function itersvd_rev(
 
     # dV₁ diagonal contribution (diagonal of dU₁ is gauged away)
     if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent)
-        L = Diagonal(VΔV)  # Implicitly performs 𝕀 ∘ dV
+        L = Diagonal(diag(VΔV))
         term += 0.5 * S⁻¹ * (L' - L)
     end
     ΔA = U * term * V
 
-    # Projector contribution for non-square A and dU₂ and dV₂
+    # Projector contribution for non-square A
     UUd = U * U'
     VdV = V' * V
     Uproj = one(UUd) - UUd
     Vproj = one(VdV) - VdV
-
-    # Truncation contribution from dU₂ and dV₂
-    function svdlinprob(v)  # Left-preconditioned linear problem
-        Γ1 = v[1] - S⁻¹ * v[2] * Vproj * Ad
-        Γ2 = v[2] - S⁻¹ * v[1] * Uproj * A
-        return (Γ1, Γ2)
-    end
-    if ΔU isa ZeroTangent && ΔV isa ZeroTangent
-        m, k, n = size(U, 1), size(U, 2), size(V, 2)
-        y = (zeros(eltype(A), k * m), zeros(eltype(A), k * n))
-        γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A))))
-    else
-        y = (S⁻¹ * ΔU' * Uproj, S⁻¹ * ΔV * Vproj)
-        γ, = linsolve(svdlinprob, y; rtol=eps(real(eltype(A))))
-    end
-    ΔA += Uproj * γ[1]' * V + U * γ[2] * Vproj
+    ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj  # Wrong truncation contribution
 
     return ΔA
 end
+
+# Computation of F in SVD adjoint, including Lorentzian broadening
+function _invert_S²(S::AbstractMatrix{T}, tol::Real, ε=0) where {T<:Real}
+    F = similar(S)
+    @inbounds for i in axes(F, 1), j in axes(F, 2)
+        F[i, j] = if i == j
+            zero(T)
+        else
+            sᵢ, sⱼ = S[i, i], S[j, j]
+            Δs = abs(sⱼ - sᵢ) < tol ? tol : sⱼ^2 - sᵢ^2
+            ε > 0 && (Δs = _lorentz_broaden(Δs, ε))
+            1 / Δs
+        end
+    end
+    return F
+end
+
+# Lorentzian broadening for SVD adjoint F-singularities
+function _lorentz_broaden(x::Real, ε=1e-12)
+    x′ = 1 / x
+    return x′ / (x′^2 + ε)
+end
\ No newline at end of file
diff --git a/test/svdwrap.jl b/test/svdwrap.jl
new file mode 100644
index 00000000..3c3a0748
--- /dev/null
+++ b/test/svdwrap.jl
@@ -0,0 +1,63 @@
+using Test
+using Random
+using LinearAlgebra
+using TensorKit
+using KrylovKit
+using ChainRulesCore, Zygote
+using PEPSKit
+
+# Gauge-invariant loss function
+function lossfun(A, alg, R=TensorMap(randn, space(A)); trunc=notrunc())
+    U, _, V, = svdwrap(A, alg; trunc)
+    return real(dot(R, U * V))  # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n
+end
+
+m, n = 20, 30
+dtype = ComplexF64
+χ = 12
+trunc = truncdim(χ)
+# lorentz_broad = 1e-12
+adjoint_tol = 1e-16
+rtol = 1e-9
+r = TensorMap(randn, dtype, ℂ^m ← ℂ^n)
+R = TensorMap(randn, space(r))
+
+@testset "Non-truncacted SVD" begin
+    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R), r)
+    l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r)
+    # l_itersvd, g_itersvd = withgradient(
+    #     A -> lossfun(A, IterSVD(; howmany=min(m, n), adjoint_tol), R), r
+    # )
+
+    @test l_oldsvd ≈ l_fullsvd 
+    # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
+    @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol
+    # @test norm(g_tsvd[1] - g_itersvd[1]) / norm(g_tsvd[1]) < rtol
+end
+
+@testset "Truncated SVD with χ=$χ" begin
+    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R; trunc), r)
+    l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R; trunc), r)
+    # l_itersvd, g_itersvd = withgradient(
+    #     A -> lossfun(A, IterSVD(; howmany=χ, adjoint_tol), R; trunc), r
+    # )
+
+    @test l_oldsvd ≈ l_fullsvd 
+    # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
+    @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol
+    # @test norm(g_tsvd[1] - g_itersvd[1]) / norm(g_tsvd[1]) < rtol
+end
+
+# @testset "Truncated SVD with χ=$χ and ε=$lorentz_broad broadening" begin
+#     l_fullsvd, g_fullsvd = withgradient(
+#         A -> lossfun(A, FullSVD(; lorentz_broad, R; trunc), r
+#     )
+#     l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(; lorentz_broad), R; trunc), r)
+#     l_itersvd, g_itersvd = withgradient(
+#         A -> lossfun(A, IterSVD(; howmany=χ, lorentz_broad), R; trunc), r
+#     )
+
+#     @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
+#     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol
+#     @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
+# end

From 78ab1527b74c14e5df0ce3cd8db6e1a17b99ab53 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Thu, 20 Jun 2024 17:46:25 +0200
Subject: [PATCH 07/27] Copy ctmrg.jl from master, add svdalg field to CTMRG,
 use svdwrap in left_move

---
 src/algorithms/ctmrg.jl | 653 +++++++++++++++++++++++++---------------
 1 file changed, 403 insertions(+), 250 deletions(-)

diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index 932ed866..65312e0c 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -1,172 +1,381 @@
-@with_kw struct CTMRG #<: Algorithm
+# TODO: add abstract Algorithm type?
+"""
+    struct CTMRG(; trscheme = TensorKit.notrunc(), tol = Defaults.ctmrg_tol,
+                 maxiter = Defaults.ctmrg_maxiter, miniter = Defaults.ctmrg_miniter,
+                 verbosity = 0, fixedspace = false)
+
+Algorithm struct that represents the CTMRG algorithm for contracting infinite PEPS.
+The projector bond dimensions are set via `trscheme` which controls the truncation
+properties inside of `TensorKit.tsvd`. Each CTMRG run is converged up to `tol`
+where the singular value convergence of the corners as well as the norm is checked.
+The maximal and minimal number of CTMRG iterations is set with `maxiter` and `miniter`.
+Different levels of output information are printed depending on `verbosity` (0, 1 or 2).
+Regardless of the truncation scheme, the space can be kept fixed with `fixedspace`.
+"""
+@kwdef struct CTMRG{S}
+    tol::Float64 = Defaults.ctmrg_tol
+    maxiter::Int = Defaults.ctmrg_maxiter
+    miniter::Int = Defaults.ctmrg_miniter
+    verbosity::Int = 0
+    svdalg::S = FullSVD()
     trscheme::TruncationScheme = TensorKit.notrunc()
-    tol::Float64 = Defaults.tol
-    maxiter::Integer = Defaults.maxiter
-    miniter::Integer = 4
-    verbose::Integer = 0
     fixedspace::Bool = false
 end
 
-@with_kw struct CTMRG2 #<: Algorithm
-    trscheme::TruncationScheme = TensorKit.notrunc()
-    tol::Float64 = Defaults.tol
-    maxiter::Integer = Defaults.maxiter
-    miniter::Integer = 4
-    verbose::Integer = 0
+"""
+    MPSKit.leading_boundary([envinit], state, alg::CTMRG)
+
+Contract `state` using CTMRG and return the CTM environment.
+Per default, a random initial environment is used.
+"""
+function MPSKit.leading_boundary(state, alg::CTMRG)
+    return MPSKit.leading_boundary(CTMRGEnv(state), state, alg)
 end
+function MPSKit.leading_boundary(envinit, state, alg::CTMRG)
+    normold = 1.0
+    CSold = map(x -> tsvd(x; alg=TensorKit.SVD())[2], envinit.corners)
+    TSold = map(x -> tsvd(x; alg=TensorKit.SVD())[2], envinit.edges)
+    ϵold = 1.0
+    env = deepcopy(envinit)
+
+    for i in 1:(alg.maxiter)
+        env, ϵ = ctmrg_iter(state, env, alg)  # Grow and renormalize in all 4 directions
+
+        # Compute convergence criteria and take max (TODO: How should we handle logging all of this?)
+        Δϵ = abs((ϵold - ϵ) / ϵold)
+        normnew = norm(state, env)
+        Δnorm = abs(normold - normnew) / abs(normold)
+        CSnew = map(c -> tsvd(c; alg=TensorKit.SVD())[2], env.corners)
+        ΔCS = maximum(zip(CSold, CSnew)) do (c_old, c_new)
+            # only compute the difference on the smallest part of the spaces
+            smallest = infimum(MPSKit._firstspace(c_old), MPSKit._firstspace(c_new))
+            e_old = isometry(MPSKit._firstspace(c_old), smallest)
+            e_new = isometry(MPSKit._firstspace(c_new), smallest)
+            return norm(e_new' * c_new * e_new - e_old' * c_old * e_old)
+        end
+        TSnew = map(t -> tsvd(t; alg=TensorKit.SVD())[2], env.edges)
 
-function MPSKit.leading_boundary(peps::InfinitePEPS, alg::CTMRG, envs=CTMRGEnv(peps))
-    return MPSKit.leading_boundary(peps, peps, alg, envs)
-end;
+        ΔTS = maximum(zip(TSold, TSnew)) do (t_old, t_new)
+            MPSKit._firstspace(t_old) == MPSKit._firstspace(t_new) ||
+                return scalartype(t_old)(Inf)
+            # TODO: implement when spaces aren't the same
+            return norm(t_new - t_old)
+        end
 
-function MPSKit.leading_boundary(
-    peps_above::InfinitePEPS,
-    peps_below::InfinitePEPS,
-    alg::CTMRG,
-    envs=CTMRGEnv(peps_above, peps_below),
-)
-    err = Inf
-    iter = 1
-
-    #for convergence criterium we use the on site contracted boundary
-    #this convergences, though the value depends on the bond dimension χ
-    old_norm = 1.0
-    new_norm = old_norm
-    ϵ₁ = 1.0
-    while (err > alg.tol && iter <= alg.maxiter) || iter <= alg.miniter
-        ϵ = 0.0
-        for i in 1:4
-            envs, ϵ₀ = left_move(peps_above, peps_below, alg, envs)
-            ϵ = max(ϵ, ϵ₀)
-            envs = rotate_north(envs, EAST)
-            peps_above = envs.peps_above
-            peps_below = envs.peps_below
+        conv_condition = max(Δnorm, ΔCS, ΔTS) < alg.tol && i > alg.miniter
+        ignore_derivatives() do # Print verbose info
+            if alg.verbosity > 1 || (alg.verbosity == 1 && (i == 1 || conv_condition))
+                @printf(
+                    "CTMRG iter: %3d   norm: %.2e   Δnorm: %.2e   ΔCS: %.2e   ΔTS: %.2e   ϵ: %.2e   Δϵ: %.2e\n",
+                    i,
+                    abs(normnew),
+                    Δnorm,
+                    ΔCS,
+                    ΔTS,
+                    ϵ,
+                    Δϵ
+                )
+            end
+            alg.verbosity > 0 &&
+                i == alg.maxiter &&
+                @warn(
+                    "CTMRG reached maximal number of iterations at (Δnorm=$Δnorm, ΔCS=$ΔCS, ΔTS=$ΔTS)"
+                )
+        end
+        conv_condition && break  # Converge if maximal Δ falls below tolerance
+
+        # Update convergence criteria
+        normold = normnew
+        CSold = CSnew
+        TSold = TSnew
+        ϵold = ϵ
+    end
+
+    # Do one final iteration that does not change the spaces
+    alg_fixed = CTMRG(;
+        alg.trscheme, alg.tol, alg.maxiter, alg.miniter, alg.verbosity, fixedspace=true
+    )
+    env′, = ctmrg_iter(state, env, alg_fixed)
+    envfix = gauge_fix(env, env′)
+    check_elementwise_convergence(env, envfix; atol=alg.tol^(3 / 4)) ||
+        @warn "CTMRG did not converge elementwise."
+    return envfix
+end
+
+"""
+    gauge_fix(envprev::CTMRGEnv{C,T}, envfinal::CTMRGEnv{C,T}) where {C,T}
+
+Fix the gauge of `envfinal` based on the previous environment `envprev`.
+This assumes that the `envfinal` is the result of one CTMRG iteration on `envprev`.
+Given that the CTMRG run is converged, the returned environment will be
+element-wise converged to `envprev`.
+"""
+function gauge_fix(envprev::CTMRGEnv{C,T}, envfinal::CTMRGEnv{C,T}) where {C,T}
+    # Check if spaces in envprev and envfinal are the same
+    same_spaces = map(Iterators.product(axes(envfinal.edges)...)) do (dir, r, c)
+        space(envfinal.edges[dir, r, c]) == space(envprev.edges[dir, r, c]) &&
+            space(envfinal.corners[dir, r, c]) == space(envprev.corners[dir, r, c])
+    end
+    @assert all(same_spaces) "Spaces of envprev and envfinal are not the same"
+
+    # Try the "general" algorithm from https://arxiv.org/abs/2311.11894
+    signs = map(Iterators.product(axes(envfinal.edges)...)) do (dir, r, c)
+        # Gather edge tensors and pretend they're InfiniteMPSs
+        if dir == NORTH
+            Tsprev = circshift(envprev.edges[dir, r, :], 1 - c)
+            Tsfinal = circshift(envfinal.edges[dir, r, :], 1 - c)
+        elseif dir == EAST
+            Tsprev = circshift(envprev.edges[dir, :, c], 1 - r)
+            Tsfinal = circshift(envfinal.edges[dir, :, c], 1 - r)
+        elseif dir == SOUTH
+            Tsprev = circshift(reverse(envprev.edges[dir, r, :]), c)
+            Tsfinal = circshift(reverse(envfinal.edges[dir, r, :]), c)
+        elseif dir == WEST
+            Tsprev = circshift(reverse(envprev.edges[dir, :, c]), r)
+            Tsfinal = circshift(reverse(envfinal.edges[dir, :, c]), r)
         end
 
-        new_norm = contract_ctrmg(envs)
-
-        err = abs(old_norm - new_norm)
-        dϵ = abs((ϵ₁ - ϵ) / ϵ₁)
-        @ignore_derivatives alg.verbose > 1 && @printf(
-            "CTMRG: \titeration: %4d\t\terror: %.2e\t\tnorm: %.10e\t\tϵ: %.2e\t\tdϵ: %.2e\n",
-            iter,
-            err,
-            abs(new_norm),
-            ϵ,
-            dϵ
+        # Random MPS of same bond dimension
+        M = map(Tsfinal) do t
+            TensorMap(randn, scalartype(t), codomain(t) ← domain(t))
+        end
+
+        # Find right fixed points of mixed transfer matrices
+        ρinit = TensorMap(
+            randn,
+            scalartype(T),
+            MPSKit._lastspace(Tsfinal[end])' ← MPSKit._lastspace(M[end])',
         )
+        ρprev = eigsolve(TransferMatrix(Tsprev, M), ρinit, 1, :LM)[2][1]
+        ρfinal = eigsolve(TransferMatrix(Tsfinal, M), ρinit, 1, :LM)[2][1]
+
+        # Decompose and multiply
+        Up, _, Vp = tsvd(ρprev)
+        Uf, _, Vf = tsvd(ρfinal)
+        Qprev = Up * Vp
+        Qfinal = Uf * Vf
+        σ = Qprev * Qfinal'
 
-        old_norm = new_norm
-        ϵ₁ = ϵ
-        iter += 1
+        return σ
     end
 
-    #@ignore_derivatives @show iter, new_norm, err
-    @ignore_derivatives iter > alg.maxiter &&
-        alg.verbose > 0 &&
-        @warn "maxiter $(alg.maxiter) reached: error was $(err)"
+    cornersfix, edgesfix = fix_relative_phases(envfinal, signs)
 
-    return envs
+    # Fix global phase
+    cornersgfix = map(zip(envprev.corners, cornersfix)) do (Cprev, Cfix)
+        φ = dot(Cprev, Cfix)
+        φ' * Cfix
+    end
+    edgesgfix = map(zip(envprev.edges, edgesfix)) do (Tprev, Tfix)
+        φ = dot(Tprev, Tfix)
+        φ' * Tfix
+    end
+    envfix = CTMRGEnv(cornersgfix, edgesgfix)
+
+    return envfix
 end
 
-function left_move(
-    peps_above::InfinitePEPS{PType},
-    peps_below::InfinitePEPS{PType},
-    alg::CTMRG,
-    envs::CTMRGEnv,
-) where {PType}
-    corners::typeof(envs.corners) = copy(envs.corners)
-    edges::typeof(envs.edges) = copy(envs.edges)
-
-    above_projector_type = tensormaptype(spacetype(PType), 1, 3, storagetype(PType))
-    below_projector_type = tensormaptype(spacetype(PType), 3, 1, storagetype(PType))
+# Explicit fixing of relative phases (doing this compactly in a loop is annoying)
+function fix_relative_phases(envfinal::CTMRGEnv, signs)
+    C1 = map(Iterators.product(axes(envfinal.corners)[2:3]...)) do (r, c)
+        @tensor Cfix[-1; -2] :=
+            signs[WEST, _prev(r, end), c][-1 1] *
+            envfinal.corners[NORTHWEST, r, c][1; 2] *
+            conj(signs[NORTH, r, c][-2 2])
+    end
+    T1 = map(Iterators.product(axes(envfinal.edges)[2:3]...)) do (r, c)
+        @tensor Tfix[-1 -2 -3; -4] :=
+            signs[NORTH, r, c][-1 1] *
+            envfinal.edges[NORTH, r, c][1 -2 -3; 2] *
+            conj(signs[NORTH, r, _next(c, end)][-4 2])
+    end
+
+    C2 = map(Iterators.product(axes(envfinal.corners)[2:3]...)) do (r, c)
+        @tensor Cfix[-1; -2] :=
+            signs[NORTH, r, _next(c, end)][-1 1] *
+            envfinal.corners[NORTHEAST, r, c][1; 2] *
+            conj(signs[EAST, r, c][-2 2])
+    end
+    T2 = map(Iterators.product(axes(envfinal.edges)[2:3]...)) do (r, c)
+        @tensor Tfix[-1 -2 -3; -4] :=
+            signs[EAST, r, c][-1 1] *
+            envfinal.edges[EAST, r, c][1 -2 -3; 2] *
+            conj(signs[EAST, _next(r, end), c][-4 2])
+    end
+
+    C3 = map(Iterators.product(axes(envfinal.corners)[2:3]...)) do (r, c)
+        @tensor Cfix[-1; -2] :=
+            signs[EAST, _next(r, end), c][-1 1] *
+            envfinal.corners[SOUTHEAST, r, c][1; 2] *
+            conj(signs[SOUTH, r, c][-2 2])
+    end
+    T3 = map(Iterators.product(axes(envfinal.edges)[2:3]...)) do (r, c)
+        @tensor Tfix[-1 -2 -3; -4] :=
+            signs[SOUTH, r, c][-1 1] *
+            envfinal.edges[SOUTH, r, c][1 -2 -3; 2] *
+            conj(signs[SOUTH, r, _prev(c, end)][-4 2])
+    end
+
+    C4 = map(Iterators.product(axes(envfinal.corners)[2:3]...)) do (r, c)
+        @tensor Cfix[-1; -2] :=
+            signs[SOUTH, r, _prev(c, end)][-1 1] *
+            envfinal.corners[SOUTHWEST, r, c][1; 2] *
+            conj(signs[WEST, r, c][-2 2])
+    end
+    T4 = map(Iterators.product(axes(envfinal.edges)[2:3]...)) do (r, c)
+        @tensor Tfix[-1 -2 -3; -4] :=
+            signs[WEST, r, c][-1 1] *
+            envfinal.edges[WEST, r, c][1 -2 -3; 2] *
+            conj(signs[WEST, _prev(r, end), c][-4 2])
+    end
+
+    return stack([C1, C2, C3, C4]; dims=1), stack([T1, T2, T3, T4]; dims=1)
+end
+
+"""
+    check_elementwise_convergence(envfinal, envfix; atol=1e-6)
+
+Check if the element-wise difference of the corner and edge tensors of the final and fixed
+CTMRG environments are below some tolerance.
+"""
+function check_elementwise_convergence(
+    envfinal::CTMRGEnv, envfix::CTMRGEnv; atol::Real=1e-6
+)
+    ΔC = envfinal.corners .- envfix.corners
+    ΔCmax = norm(ΔC, Inf)
+    ΔCmean = norm(ΔC)
+    @debug "maxᵢⱼ|Cⁿ⁺¹ - Cⁿ|ᵢⱼ = $ΔCmax   mean |Cⁿ⁺¹ - Cⁿ|ᵢⱼ = $ΔCmean"
+
+    ΔT = envfinal.edges .- envfix.edges
+    ΔTmax = norm(ΔT, Inf)
+    ΔTmean = norm(ΔT)
+    @debug "maxᵢⱼ|Tⁿ⁺¹ - Tⁿ|ᵢⱼ = $ΔTmax   mean |Tⁿ⁺¹ - Tⁿ|ᵢⱼ = $ΔTmean"
+
+    # Check differences for all tensors in unit cell to debug properly
+    for (dir, r, c) in Iterators.product(axes(envfinal.edges)...)
+        @debug(
+            "$((dir, r, c)): all |Cⁿ⁺¹ - Cⁿ|ᵢⱼ < ϵ: ",
+            all(x -> abs(x) < atol, convert(Array, ΔC[dir, r, c])),
+        )
+        @debug(
+            "$((dir, r, c)): all |Tⁿ⁺¹ - Tⁿ|ᵢⱼ < ϵ: ",
+            all(x -> abs(x) < atol, convert(Array, ΔT[dir, r, c])),
+        )
+    end
+
+    return isapprox(ΔCmax, 0; atol) && isapprox(ΔTmax, 0; atol)
+end
+
+@non_differentiable check_elementwise_convergence(args...)
+
+"""
+    ctmrg_iter(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
+    
+Perform one iteration of CTMRG that maps the `state` and `env` to a new environment,
+and also return the truncation error.
+One CTMRG iteration consists of four `left_move` calls and 90 degree rotations,
+such that the environment is grown and renormalized in all four directions.
+"""
+function ctmrg_iter(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
     ϵ = 0.0
-    n0 = 1.0
-    n1 = 1.0
-    for col in 1:size(peps_above, 2)
-        cop = mod1(col + 1, size(peps_above, 2))
-        com = mod1(col - 1, size(peps_above, 2))
-
-        above_projs = Vector{above_projector_type}(undef, size(peps_above, 1))
-        below_projs = Vector{below_projector_type}(undef, size(peps_above, 1))
-
-        # find all projectors
-        for row in 1:size(peps_above, 1)
-            rop = mod1(row + 1, size(peps_above, 1))
-            peps_above_nw = peps_above[row, col]
-            peps_above_sw = rotate_north(peps_above[rop, col], WEST)
-            peps_below_nw = peps_below[row, col]
-            peps_below_sw = rotate_north(peps_below[rop, col], WEST)
-
-            Q1 = northwest_corner(
-                envs.edges[SOUTH, mod1(row + 1, end), col],
-                envs.corners[SOUTHWEST, mod1(row + 1, end), col],
-                envs.edges[WEST, mod1(row + 1, end), col],
-                peps_above_sw,
-                peps_below_sw,
+
+    for _ in 1:4
+        env, _, _, ϵ₀ = left_move(state, env, alg)
+        state = rotate_north(state, EAST)
+        env = rotate_north(env, EAST)
+        ϵ = max(ϵ, ϵ₀)
+    end
+
+    return env, ϵ
+end
+
+"""
+    left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
+
+Grow, project and renormalize the environment `env` in west direction.
+Return the updated environment as well as the projectors and truncation error.
+"""
+function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
+    corners::typeof(env.corners) = copy(env.corners)
+    edges::typeof(env.edges) = copy(env.edges)
+    ϵ = 0.0
+    Pleft, Pright = Zygote.Buffer.(projector_type(T, size(state)))  # Use Zygote.Buffer instead of @diffset to avoid ZeroTangent errors in _setindex
+
+    for col in 1:size(state, 2)
+        cnext = _next(col, size(state, 2))
+
+        # Compute projectors
+        for row in 1:size(state, 1)
+            rnext = _next(row, size(state, 1))
+            state_nw = state[row, col]
+            state_sw = rotate_north(state[rnext, col], WEST)
+
+            # Enlarged corners
+            Q_sw = northwest_corner(
+                env.edges[SOUTH, _next(row, end), col],
+                env.corners[SOUTHWEST, _next(row, end), col],
+                env.edges[WEST, _next(row, end), col],
+                state_sw,
             )
-            Q2 = northwest_corner(
-                envs.edges[WEST, row, col],
-                envs.corners[NORTHWEST, row, col],
-                envs.edges[NORTH, row, col],
-                peps_above_nw,
-                peps_below_nw,
+            Q_nw = northwest_corner(
+                env.edges[WEST, row, col],
+                env.corners[NORTHWEST, row, col],
+                env.edges[NORTH, row, col],
+                state_nw,
             )
 
+            # SVD half-infinite environment
             trscheme = if alg.fixedspace == true
-                truncspace(space(envs.edges[WEST, row, cop], 1))
+                truncspace(space(env.edges[WEST, row, cnext], 1))
             else
                 alg.trscheme
             end
-            #@ignore_derivatives @show norm(Q1*Q2)
-
-            (U, S, V) = tsvd(Q1 * Q2; trunc=trscheme, alg=SVD())
-
-            @ignore_derivatives n0 = norm(Q1 * Q2)^2
-            @ignore_derivatives n1 = norm(U * S * V)^2
-            @ignore_derivatives ϵ = max(ϵ, (n0 - n1) / n0)
-
-            isqS = sdiag_inv_sqrt(S)
-            #Q = isqS*U'*Q1;
-            #P = Q2*V'*isqS;
-            @tensor Q[-1; -2 -3 -4] := isqS[-1; 1] * conj(U[2 3 4; 1]) * Q1[2 3 4; -2 -3 -4]
-            @tensor P[-1 -2 -3; -4] := Q2[-1 -2 -3; 1 2 3] * conj(V[4; 1 2 3]) * isqS[4; -4]
+            U, S, V, ϵ_local = svdwrap(Q_sw * Q_nw, alg.svdalg; trunc=trscheme)
+            ϵ = max(ϵ, ϵ_local / norm(S))
+            # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better
+
+            # Compute SVD truncation error and check for degenerate singular values
+            ignore_derivatives() do
+                if alg.verbosity > 0 && is_degenerate_spectrum(S)
+                    svals = TensorKit.SectorDict(c => diag(b) for (c, b) in blocks(S))
+                    @warn("degenerate singular values detected: ", svals)
+                end
+            end
 
-            @diffset above_projs[row] = Q
-            @diffset below_projs[row] = P
+            # Compute projectors
+            Pl, Pr = build_projectors(U, S, V, Q_sw, Q_nw)
+            Pleft[row, col] = Pl
+            Pright[row, col] = Pr
         end
 
-        #use the projectors to grow the corners/edges
-        for row in 1:size(peps_above, 1)
-            Q = above_projs[row]
-            P = below_projs[mod1(row - 1, end)]
-            rop = mod1(row + 1, size(peps_above, 1))
-            rom = mod1(row - 1, size(peps_above, 1))
-
-            @diffset @tensor corners[NORTHWEST, rop, cop][-1; -2] :=
-                envs.corners[NORTHWEST, rop, col][1, 2] *
-                envs.edges[NORTH, rop, col][2, 3, 4, -2] *
-                Q[-1; 1 3 4]
-            @diffset @tensor corners[SOUTHWEST, rom, cop][-1; -2] :=
-                envs.corners[SOUTHWEST, rom, col][1, 4] *
-                envs.edges[SOUTH, rom, col][-1, 2, 3, 1] *
-                P[4 2 3; -2]
-            @diffset @tensor edges[WEST, row, cop][-1 -2 -3; -4] :=
-                envs.edges[WEST, row, col][1 2 3; 4] *
-                peps_above[row, col][9; 5 -2 7 2] *
-                conj(peps_below[row, col][9; 6 -3 8 3]) *
-                P[4 5 6; -4] *
-                Q[-1; 1 7 8]
+        # Use projectors to grow the corners & edges
+        for row in 1:size(state, 1)
+            rprev = _prev(row, size(state, 1))
+            rnext = _next(row, size(state, 1))
+            C_sw, C_nw, T_w = grow_env_left(
+                state[row, col],
+                Pleft[_prev(row, end), col],
+                Pright[row, col],
+                env.corners[SOUTHWEST, rprev, col],
+                env.corners[NORTHWEST, rnext, col],
+                env.edges[SOUTH, rprev, col],
+                env.edges[WEST, row, col],
+                env.edges[NORTH, rnext, col],
+            )
+            @diffset corners[SOUTHWEST, rprev, cnext] = C_sw
+            @diffset corners[NORTHWEST, rnext, cnext] = C_nw
+            @diffset edges[WEST, row, cnext] = T_w
         end
 
-        @diffset corners[NORTHWEST, :, cop] ./= norm.(corners[NORTHWEST, :, cop])
-        @diffset edges[WEST, :, cop] ./= norm.(edges[WEST, :, cop])
-        @diffset corners[SOUTHWEST, :, cop] ./= norm.(corners[SOUTHWEST, :, cop])
+        @diffset corners[SOUTHWEST, :, cnext] ./= norm.(corners[SOUTHWEST, :, cnext])
+        @diffset corners[NORTHWEST, :, cnext] ./= norm.(corners[NORTHWEST, :, cnext])
+        @diffset edges[WEST, :, cnext] ./= norm.(edges[WEST, :, cnext])
     end
 
-    return CTMRGEnv(peps_above, peps_below, corners, edges), ϵ
+    return CTMRGEnv(corners, edges), copy(Pleft), copy(Pright), ϵ
 end
 
+# Compute enlarged NW corner
 function northwest_corner(E4, C1, E1, peps_above, peps_below=peps_above)
     @tensor corner[-1 -2 -3; -4 -5 -6] :=
         E4[-1 1 2; 3] *
@@ -175,6 +384,8 @@ function northwest_corner(E4, C1, E1, peps_above, peps_below=peps_above)
         peps_above[7; 5 -5 -2 1] *
         conj(peps_below[7; 6 -6 -3 2])
 end
+
+# Compute enlarged NE corner
 function northeast_corner(E1, C2, E2, peps_above, peps_below=peps_above)
     @tensor corner[-1 -2 -3; -4 -5 -6] :=
         E1[-1 1 2; 3] *
@@ -183,6 +394,8 @@ function northeast_corner(E1, C2, E2, peps_above, peps_below=peps_above)
         peps_above[7; 1 5 -5 -2] *
         conj(peps_below[7; 2 6 -6 -3])
 end
+
+# Compute enlarged SE corner
 function southeast_corner(E2, C3, E3, peps_above, peps_below=peps_above)
     @tensor corner[-1 -2 -3; -4 -5 -6] :=
         E2[-1 1 2; 3] *
@@ -192,130 +405,70 @@ function southeast_corner(E2, C3, E3, peps_above, peps_below=peps_above)
         conj(peps_below[7; -3 2 6 -6])
 end
 
-#=
-
-function MPSKit.leading_boundary(peps::InfinitePEPS,alg::CTMRG2,envs = CTMRGEnv(peps))
-    err = Inf
-    iter = 1
-
-    old_norm = 1.0
-
-    while (err > alg.tol && iter <= alg.maxiter) || iter<4
-
-        for dir in 1:4
-            envs = left_move(peps,alg,envs);
-
-            envs = rotate_north(envs,EAST);
-            peps = envs.peps;
-        end
-        new_norm = abs(contract_ctrmg(peps,envs,1,1))
-        #@show new_norm
-        err = abs(old_norm-new_norm)
-        @ignore_derivatives mod(alg.verbose,alg.miniter)==0 && mod(iter,alg.verbose+1)==0 &&  @info "$(iter) $(err) $(new_norm)"
-
-        old_norm = new_norm
-        iter += 1
-    end
-    @ignore_derivatives iter > alg.maxiter && @warn "maxiter $(alg.maxiter) reached: error was $(err)"
+# Build projectors from SVD and enlarged SW & NW corners
+function build_projectors(
+    U::AbstractTensorMap{E,3,1}, S, V::AbstractTensorMap{E,1,3}, Q_sw, Q_nw
+) where {E<:ElementarySpace}
+    isqS = sdiag_inv_sqrt(S)
+    @tensor Pl[-1 -2 -3; -4] := Q_nw[-1 -2 -3; 1 2 3] * conj(V[4; 1 2 3]) * isqS[4; -4]
+    @tensor Pr[-1; -2 -3 -4] := isqS[-1; 1] * conj(U[2 3 4; 1]) * Q_sw[2 3 4; -2 -3 -4]
+    return Pl, Pr
+end
 
-    envs
+# Apply projectors to entire left half-environment to grow SW & NW corners, and W edge
+function grow_env_left(peps, Pl, Pr, C_sw, C_nw, T_s, T_w, T_n)
+    @tensor C_sw′[-1; -2] := C_sw[1; 4] * T_s[-1 2 3; 1] * Pl[4 2 3; -2]
+    @tensor C_nw′[-1; -2] := C_nw[1; 2] * T_n[2 3 4; -2] * Pr[-1; 1 3 4]
+    @tensor T_w′[-1 -2 -3; -4] :=
+        T_w[1 2 3; 4] *
+        peps[9; 5 -2 7 2] *
+        conj(peps[9; 6 -3 8 3]) *
+        Pl[4 5 6; -4] *
+        Pr[-1; 1 7 8]
+    return C_sw′, C_nw′, T_w′
 end
 
-# the actual left_move is dependent on the type of ctmrg, so this seems natural
-function left_move(peps::InfinitePEPS{PType},alg::CTMRG2,envs::CTMRGEnv) where PType
-    corners::typeof(envs.corners) = copy(envs.corners);
-    edges::typeof(envs.edges) = copy(envs.edges);
-
-    above_projector_type = tensormaptype(spacetype(PType),1,3,storagetype(PType));
-    below_projector_type = tensormaptype(spacetype(PType),3,1,storagetype(PType));
-
-    for col in 1:size(peps,2)
-        cop = mod1(col+1, size(peps,2))
-        above_projs = Vector{above_projector_type}(undef,size(peps,1));
-        below_projs = Vector{below_projector_type}(undef,size(peps,1));
-
-        # find all projectors
-        for row in 1:size(peps,1)
-            rop = mod1(row+1, size(peps,1))
-            peps_nw = peps[row,col];
-            peps_sw = rotate_north(peps[rop,col],WEST);
-
-            Q1 = northwest_corner(envs.edges[WEST,row,col],envs.corners[NORTHWEST,row,col],  envs.edges[NORTH,row,col],peps_nw);
-            Q2 = northeast_corner(envs.edges[NORTH,row,cop],envs.corners[NORTHEAST,row,cop],envs.edges[EAST,row,cop],peps[row,cop])
-            Q3 = southeast_corner(envs.edges[EAST,rop,cop],envs.corners[SOUTHEAST,rop,cop],envs.edges[SOUTH,rop,cop],peps[rop,cop])
-            Q4 = northwest_corner(envs.edges[SOUTH,rop,col],envs.corners[SOUTHWEST,rop,col],envs.edges[WEST,rop,col],peps_sw);
-            Qnorth = Q1*Q2
-            Qsouth = Q3*Q4
-            (U,S,V) = tsvd(Qsouth*Qnorth, trunc = alg.trscheme);
-            #@ignore_derivatives @show ϵ = real(norm(Qsouth*Qnorth)^2-norm(U*S*V)^2) 
-            #@ignore_derivatives @info ϵ
-            isqS = sdiag_inv_sqrt(S);
-            Q = isqS*U'*Qsouth;
-            P = Qnorth*V'*isqS;
-
-            @diffset above_projs[row] = Q;
-            @diffset below_projs[row] = P;
-        end
+@doc """
+    LinearAlgebra.norm(peps::InfinitePEPS, env::CTMRGEnv)
 
-        #use the projectors to grow the corners/edges
-        for row in 1:size(peps,1)
-            Q = above_projs[row];
-            P = below_projs[mod1(row-1,end)];
-
-            @diffset @tensor corners[NORTHWEST,row+1,col+1][-1;-2] := envs.corners[NORTHWEST,row+1,col][1,2] * envs.edges[NORTH,row+1,col][2,3,4,-2]*Q[-1;1 3 4]
-            @diffset @tensor corners[SOUTHWEST,row-1,col+1][-1;-2] := envs.corners[SOUTHWEST,row-1,col][1,4] * envs.edges[SOUTH,row-1,col][-1,2,3,1]*P[4 2 3;-2]
-            @diffset @tensor edges[WEST,row,col+1][-1 -2 -3;-4] := envs.edges[WEST,row,col][1 2 3;4]*
-            peps[row,col][9;5 -2 7 2]*
-            conj(peps[row,col][9;6 -3 8 3])*
-            P[4 5 6;-4]*
-            Q[-1;1 7 8]
-        end
+Compute the norm of a PEPS contracted with a CTM environment.
+"""
 
-        @diffset corners[NORTHWEST,:,col+1]./=norm.(corners[NORTHWEST,:,col+1]);
-        @diffset corners[SOUTHWEST,:,col+1]./=norm.(corners[SOUTHWEST,:,col+1]);
-        @diffset edges[WEST,:,col+1]./=norm.(edges[WEST,:,col+1]);
-    end
+function LinearAlgebra.norm(peps::InfinitePEPS, env::CTMRGEnv)
+    total = one(scalartype(peps))
 
-    CTMRGEnv(peps,corners,edges);
-end
-=#
+    for r in 1:size(peps, 1), c in 1:size(peps, 2)
+        total *= @tensor env.edges[WEST, r, c][1 2 3; 4] *
+            env.corners[NORTHWEST, r, c][4; 5] *
+            env.edges[NORTH, r, c][5 6 7; 8] *
+            env.corners[NORTHEAST, r, c][8; 9] *
+            env.edges[EAST, r, c][9 10 11; 12] *
+            env.corners[SOUTHEAST, r, c][12; 13] *
+            env.edges[SOUTH, r, c][13 14 15; 16] *
+            env.corners[SOUTHWEST, r, c][16; 1] *
+            peps[r, c][17; 6 10 14 2] *
+            conj(peps[r, c][17; 7 11 15 3])
 
-function contract_ctrmg(
-    envs::CTMRGEnv, peps_above=envs.peps_above, peps_below=envs.peps_below
-)
-    total = 1.0 + 0im
-
-    for r in 1:size(peps_above, 1), c in 1:size(peps_above, 2)
-        total *= @tensor envs.edges[WEST, r, c][1 2 3; 4] *
-            envs.corners[NORTHWEST, r, c][4; 5] *
-            envs.edges[NORTH, r, c][5 6 7; 8] *
-            envs.corners[NORTHEAST, r, c][8; 9] *
-            envs.edges[EAST, r, c][9 10 11; 12] *
-            envs.corners[SOUTHEAST, r, c][12; 13] *
-            envs.edges[SOUTH, r, c][13 14 15; 16] *
-            envs.corners[SOUTHWEST, r, c][16; 1] *
-            peps_above[r, c][17; 6 10 14 2] *
-            conj(peps_below[r, c][17; 7 11 15 3])
         total *= tr(
-            envs.corners[NORTHWEST, r, c] *
-            envs.corners[NORTHEAST, r, mod1(c - 1, end)] *
-            envs.corners[SOUTHEAST, mod1(r - 1, end), mod1(c - 1, end)] *
-            envs.corners[SOUTHWEST, mod1(r - 1, end), c],
+            env.corners[NORTHWEST, r, c] *
+            env.corners[NORTHEAST, r, mod1(c - 1, end)] *
+            env.corners[SOUTHEAST, mod1(r - 1, end), mod1(c - 1, end)] *
+            env.corners[SOUTHWEST, mod1(r - 1, end), c],
         )
 
-        total /= @tensor envs.edges[WEST, r, c][1 10 11; 4] *
-            envs.corners[NORTHWEST, r, c][4; 5] *
-            envs.corners[NORTHEAST, r, mod1(c - 1, end)][5; 6] *
-            envs.edges[EAST, r, mod1(c - 1, end)][6 10 11; 7] *
-            envs.corners[SOUTHEAST, r, mod1(c - 1, end)][7; 8] *
-            envs.corners[SOUTHWEST, r, c][8; 1]
-
-        total /= @tensor envs.corners[NORTHWEST, r, c][1; 2] *
-            envs.edges[NORTH, r, c][2 10 11; 3] *
-            envs.corners[NORTHEAST, r, c][3; 4] *
-            envs.corners[SOUTHEAST, mod1(r - 1, end), c][4; 5] *
-            envs.edges[SOUTH, mod1(r - 1, end), c][5 10 11; 6] *
-            envs.corners[SOUTHWEST, mod1(r - 1, end), c][6; 1]
+        total /= @tensor env.edges[WEST, r, c][1 10 11; 4] *
+            env.corners[NORTHWEST, r, c][4; 5] *
+            env.corners[NORTHEAST, r, mod1(c - 1, end)][5; 6] *
+            env.edges[EAST, r, mod1(c - 1, end)][6 10 11; 7] *
+            env.corners[SOUTHEAST, r, mod1(c - 1, end)][7; 8] *
+            env.corners[SOUTHWEST, r, c][8; 1]
+
+        total /= @tensor env.corners[NORTHWEST, r, c][1; 2] *
+            env.edges[NORTH, r, c][2 10 11; 3] *
+            env.corners[NORTHEAST, r, c][3; 4] *
+            env.corners[SOUTHEAST, mod1(r - 1, end), c][4; 5] *
+            env.edges[SOUTH, mod1(r - 1, end), c][5 10 11; 6] *
+            env.corners[SOUTHWEST, mod1(r - 1, end), c][6; 1]
     end
 
     return total

From fc6600ecc6d7ef5bd3d3f1f3497e8eb147212d68 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Thu, 4 Jul 2024 18:03:00 +0200
Subject: [PATCH 08/27] Use KrylovKit implementation of eigsolve instead of
 eigsolve.jl, delete svdwrap wrapper and instead directly use tsvd, add
 KrylovKit compat entry

---
 Project.toml                              |   2 +-
 src/PEPSKit.jl                            |   3 +-
 src/algorithms/ctmrg.jl                   |   4 +-
 src/utility/eigsolve.jl                   | 251 ----------------------
 src/utility/svd.jl                        | 155 +++++--------
 test/{svdwrap.jl => ctmrg/svd_wrapper.jl} |  24 +--
 test/runtests.jl                          |   3 +
 7 files changed, 72 insertions(+), 370 deletions(-)
 delete mode 100644 src/utility/eigsolve.jl
 rename test/{svdwrap.jl => ctmrg/svd_wrapper.jl} (74%)

diff --git a/Project.toml b/Project.toml
index f8b9f357..98a70c11 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,7 +22,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 Accessors = "0.1"
 ChainRulesCore = "1.0"
 Compat = "3.46, 4.2"
-KrylovKit = "0.6, 0.7, 0.8"
+KrylovKit = "0.8"
 LinearAlgebra = "1"
 MPSKit = "0.10"
 OptimKit = "0.3"
diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl
index f0cbc7fb..1ee02e1f 100644
--- a/src/PEPSKit.jl
+++ b/src/PEPSKit.jl
@@ -10,7 +10,6 @@ using ChainRulesCore, Zygote
 
 include("utility/util.jl")
 include("utility/svd.jl")
-include("utility/eigsolve.jl")
 include("utility/rotations.jl")
 include("utility/hook_pullback.jl")
 
@@ -59,7 +58,7 @@ module Defaults
     const fpgrad_tol = 1e-6
 end
 
-export FullSVD, IterSVD, OldSVD, svdwrap, itersvd
+export FullSVD, IterSVD, OldSVD
 export CTMRG, CTMRGEnv
 export NLocalOperator, AnisotropicNNOperator, OnSite, NearestNeighbor
 export expectation_value, costfun
diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index a69d6da9..70cbe133 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -17,7 +17,7 @@ Regardless of the truncation scheme, the space can be kept fixed with `fixedspac
     maxiter::Int = Defaults.ctmrg_maxiter
     miniter::Int = Defaults.ctmrg_miniter
     verbosity::Int = 0
-    svdalg::S = FullSVD()
+    svdalg::S = TensorKit.SVD()
     trscheme::TruncationScheme = TensorKit.notrunc()
     fixedspace::Bool = false
 end
@@ -336,7 +336,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
                 alg.trscheme
             end
             @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6]
-            U, S, V, ϵ_local = svdwrap(QQ, alg.svdalg; trunc=trscheme)
+            U, S, V, ϵ_local = tsvd(QQ; trunc=trscheme, alg=alg.svdalg)
             ϵ = max(ϵ, ϵ_local / norm(S))
             # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better
 
diff --git a/src/utility/eigsolve.jl b/src/utility/eigsolve.jl
deleted file mode 100644
index 975adf49..00000000
--- a/src/utility/eigsolve.jl
+++ /dev/null
@@ -1,251 +0,0 @@
-# Copied from Jutho/KrylovKit.jl/pull/56, with minor tweaks
-
-function ChainRulesCore.rrule(
-    ::typeof(eigsolve), A::AbstractMatrix, x₀, howmany, which, alg
-)
-    (vals, vecs, info) = eigsolve(A, x₀, howmany, which, alg)
-    project_A = ProjectTo(A)
-    T = eltype(vecs[1]) # will be real for real symmetric problems and complex otherwise
-
-    function eigsolve_pullback(ΔX)
-        _Δvals = unthunk(ΔX[1])
-        _Δvecs = unthunk(ΔX[2])
-
-        ∂self = NoTangent()
-        ∂x₀ = ZeroTangent()
-        ∂howmany = NoTangent()
-        ∂which = NoTangent()
-        ∂alg = NoTangent()
-        if _Δvals isa AbstractZero && _Δvecs isa AbstractZero
-            ∂A = ZeroTangent()
-            return ∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg
-        end
-
-        if _Δvals isa AbstractZero
-            Δvals = fill(NoTangent(), length(Δvecs))
-        else
-            Δvals = _Δvals
-        end
-        if _Δvecs isa AbstractZero
-            Δvecs = fill(NoTangent(), length(Δvals))
-        else
-            Δvecs = _Δvecs
-        end
-
-        @assert length(Δvals) == length(Δvecs)
-        @assert length(Δvals) <= length(vals)
-
-        # Determine algorithm to solve linear problem
-        # TODO: Is there a better choice? Should we make this user configurable?
-        linalg = GMRES(;
-            tol=alg.tol, krylovdim=alg.krylovdim, maxiter=alg.maxiter, orth=alg.orth
-        )
-
-        ws = similar(vecs, length(Δvecs))
-        for i in 1:length(Δvecs)
-            Δλ = Δvals[i]
-            Δv = Δvecs[i]
-            λ = vals[i]
-            v = vecs[i]
-
-            # First threat special cases
-            if isa(Δv, AbstractZero) && isa(Δλ, AbstractZero) # no contribution
-                ws[i] = Δv # some kind of zero
-                continue
-            end
-            if isa(Δv, AbstractZero) && isa(alg, Lanczos) # simple contribution
-                ws[i] = Δλ * v
-                continue
-            end
-
-            # General case :
-            if isa(Δv, AbstractZero)
-                b = RecursiveVec(zero(T) * v, T[Δλ])
-            else
-                @assert isa(Δv, typeof(v))
-                b = RecursiveVec(Δv, T[Δλ])
-            end
-
-            if i > 1 &&
-                eltype(A) <: Real &&
-                vals[i] == conj(vals[i - 1]) &&
-                Δvals[i] == conj(Δvals[i - 1]) &&
-                vecs[i] == conj(vecs[i - 1]) &&
-                Δvecs[i] == conj(Δvecs[i - 1])
-                ws[i] = conj(ws[i - 1])
-                continue
-            end
-
-            w, reverse_info = let λ = λ, v = v, Aᴴ = A'
-                linsolve(b, zero(T) * b, linalg) do x
-                    x1, x2 = x
-                    γ = 1
-                    # γ can be chosen freely and does not affect the solution theoretically
-                    # The current choice guarantees that the extended matrix is Hermitian if A is
-                    # TODO: is this the best choice in all cases?
-                    y1 = axpy!(-γ * x2[], v, axpy!(-conj(λ), x1, A' * x1))
-                    y2 = T[-dot(v, x1)]
-                    return RecursiveVec(y1, y2)
-                end
-            end
-            if info.converged >= i && reverse_info.converged == 0
-                @warn "The cotangent linear problem did not converge, whereas the primal eigenvalue problem did."
-            end
-            ws[i] = w[1]
-        end
-
-        if A isa StridedMatrix
-            ∂A = InplaceableThunk(
-                Ā -> _buildĀ!(Ā, ws, vecs), @thunk(_buildĀ!(zero(A), ws, vecs))
-            )
-        else
-            ∂A = @thunk(project_A(_buildĀ!(zero(A), ws, vecs)))
-        end
-        return ∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg
-    end
-    return (vals, vecs, info), eigsolve_pullback
-end
-
-function _buildĀ!(Ā, ws, vs)
-    for i in 1:length(ws)
-        w = ws[i]
-        v = vs[i]
-        if !(w isa AbstractZero)
-            if eltype(Ā) <: Real && eltype(w) <: Complex
-                mul!(Ā, _realview(w), _realview(v)', -1, 1)
-                mul!(Ā, _imagview(w), _imagview(v)', -1, 1)
-            else
-                mul!(Ā, w, v', -1, 1)
-            end
-        end
-    end
-    return Ā
-end
-function _realview(v::AbstractVector{Complex{T}}) where {T}
-    return view(reinterpret(T, v), 2 * (1:length(v)) .- 1)
-end
-function _imagview(v::AbstractVector{Complex{T}}) where {T}
-    return view(reinterpret(T, v), 2 * (1:length(v)))
-end
-
-function ChainRulesCore.rrule(
-    config::RuleConfig{>:HasReverseMode},
-    ::typeof(eigsolve),
-    A::AbstractMatrix,
-    x₀,
-    howmany,
-    which,
-    alg,
-)
-    return ChainRulesCore.rrule(eigsolve, A, x₀, howmany, which, alg)
-end
-
-function ChainRulesCore.rrule(
-    config::RuleConfig{>:HasReverseMode}, ::typeof(eigsolve), f, x₀, howmany, which, alg
-)
-    (vals, vecs, info) = eigsolve(f, x₀, howmany, which, alg)
-    resize!(vecs, howmany)
-    resize!(vals, howmany)
-    T = typeof(dot(vecs[1], vecs[1]))
-    f_pullbacks = map(x -> rrule_via_ad(config, f, x)[2], vecs)
-    function eigsolve_pullback(ΔX)
-        _Δvals = unthunk(ΔX[1])
-        _Δvecs = unthunk(ΔX[2])
-
-        ∂self = NoTangent()
-        ∂x₀ = ZeroTangent()
-        ∂howmany = NoTangent()
-        ∂which = NoTangent()
-        ∂alg = NoTangent()
-        if _Δvals isa AbstractZero && _Δvecs isa AbstractZero
-            ∂A = ZeroTangent()
-            return (∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg)
-        end
-
-        if _Δvals isa AbstractZero
-            Δvals = fill(NoTangent(), howmany)
-        else
-            Δvals = _Δvals
-        end
-        if _Δvecs isa AbstractZero
-            Δvecs = fill(NoTangent(), howmany)
-        else
-            Δvecs = _Δvecs
-        end
-
-        # filter ZeroTangents, added compared to Jutho/KrylovKit.jl/pull/56
-        Δvecs = filter(x -> !(x isa AbstractZero), Δvecs)
-        @assert length(Δvals) == length(Δvecs)
-
-        # Determine algorithm to solve linear problem
-        # TODO: Is there a better choice? Should we make this user configurable?
-        linalg = GMRES(;
-            tol=alg.tol,
-            krylovdim=alg.krylovdim + 10,
-            maxiter=alg.maxiter * 10,
-            orth=alg.orth,
-        )
-        # linalg = BiCGStab(;
-        #     tol = alg.tol,
-        #     maxiter = alg.maxiter*alg.krylovdim,
-        # )
-
-        ws = similar(Δvecs)
-        for i in 1:length(Δvecs)
-            Δλ = Δvals[i]
-            Δv = Δvecs[i]
-            λ = vals[i]
-            v = vecs[i]
-
-            # First threat special cases
-            if isa(Δv, AbstractZero) && isa(Δλ, AbstractZero) # no contribution
-                ws[i] = 0 * v # some kind of zero
-                continue
-            end
-            if isa(Δv, AbstractZero) && isa(alg, Lanczos) # simple contribution
-                ws[i] = Δλ * v
-                continue
-            end
-
-            # General case :
-            if isa(Δv, AbstractZero)
-                b = RecursiveVec(zero(T) * v, T[-Δλ])
-            else
-                @assert isa(Δv, typeof(v))
-                b = RecursiveVec(-Δv, T[-Δλ])
-            end
-
-            # TODO: is there any analogy to this for general vector-like user types
-            # if i > 1 && eltype(A) <: Real &&
-            #     vals[i] == conj(vals[i-1]) && Δvals[i] == conj(Δvals[i-1]) &&
-            #     vecs[i] == conj(vecs[i-1]) && Δvecs[i] == conj(Δvecs[i-1])
-            #
-            #     ws[i] = conj(ws[i-1])
-            #     continue
-            # end
-
-            w, reverse_info = let λ = λ, v = v, fᴴ = x -> f_pullbacks[i](x)[2]
-                linsolve(b, zero(T) * b, linalg) do x
-                    x1, x2 = x
-                    γ = 1
-                    # γ can be chosen freely and does not affect the solution theoretically
-                    # The current choice guarantees that the extended matrix is Hermitian if A is
-                    # TODO: is this the best choice in all cases?
-                    y1 = axpy!(-γ * x2[], v, axpy!(-conj(λ), x1, fᴴ(x1)))
-                    y2 = T[-dot(v, x1)]
-                    return RecursiveVec(y1, y2)
-                end
-            end
-            if info.converged >= i && reverse_info.converged == 0
-                @warn "The cotangent linear problem ($i) did not converge, whereas the primal eigenvalue problem did." reverse_info b
-            end
-            ws[i] = w[1]
-        end
-        ∂f = f_pullbacks[1](ws[1])[1]
-        for i in 2:length(ws)
-            ∂f = VectorInterface.add!!(∂f, f_pullbacks[i](ws[i])[1])
-        end
-        return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg
-    end
-    return (vals, vecs, info), eigsolve_pullback
-end
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index d06b27c8..ebcc9784 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -1,108 +1,56 @@
-import TensorKit:
-    SectorDict,
-    _empty_svdtensors,
-    _compute_svddata!,
-    _truncate!,
-    _implement_svdtruncation!,
-    _create_svdtensors
-
-# Plain copy of tsvd!(...) from TensorKit to lift alg type restriction
-function _tensorkit_svd!(
-    t::TensorMap;
-    trunc::TruncationScheme=TensorKit.NoTruncation(),
-    p::Real=2,
-    alg=TensorKit.SVD(),
-)
-    #early return
-    if isempty(blocksectors(t))
-        truncerr = zero(real(scalartype(t)))
-        return _empty_svdtensors(t)..., truncerr
-    end
-
-    S = spacetype(t)
-    Udata, Σdata, Vdata, dims = _compute_svddata!(t, alg)
-    if !isa(trunc, TensorKit.NoTruncation)
-        Σdata, truncerr = _truncate!(Σdata, trunc, p)
-        Udata, Σdata, Vdata, dims = _implement_svdtruncation!(t, Udata, Σdata, Vdata, dims)
-        W = S(dims)
-    else
-        truncerr = abs(zero(scalartype(t)))
-        W = S(dims)
-        if length(domain(t)) == 1 && domain(t)[1] ≅ W
-            W = domain(t)[1]
-        elseif length(codomain(t)) == 1 && codomain(t)[1] ≅ W
-            W = codomain(t)[1]
-        end
-    end
-    return _create_svdtensors(t, Udata, Σdata, Vdata, W)..., truncerr
-end
-
-# Wrapper struct around TensorKit's SVD algorithms
-@kwdef struct FullSVD
-    alg::Union{<:TensorKit.SVD,<:TensorKit.SDD} = TensorKit.SVD()
-    lorentz_broad::Float64 = 0.0
-end
-
-function svdwrap(t::AbstractTensorMap, alg::FullSVD; trunc=notrunc(), kwargs...)
-    # TODO: Replace _tensorkit_svd! with just tsvd eventually to use the full TensorKit machinery
-    return _tensorkit_svd!(copy(t); trunc, alg.alg)
-end
-
-function ChainRulesCore.rrule(
-    ::typeof(svdwrap), t::AbstractTensorMap, alg::FullSVD; trunc=notrunc(), kwargs...
-)
-    tsvd_return, tsvd!_pullback = ChainRulesCore.rrule(tsvd!, t; alg=TensorKit.SVD(), trunc)
-    function svdwrap_fullsvd_pullback(Δ)
-        return tsvd!_pullback(Δ)..., NoTangent()
-    end
-    return tsvd_return, svdwrap_fullsvd_pullback
-end
+using TensorKit: SectorDict
 
 # Wrapper around Krylov Kit's GKL iterative SVD solver
 @kwdef struct IterSVD
     alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25)
-    howmany::Int = 20
     lorentz_broad::Float64 = 0.0
     alg_rrule::Union{GMRES,BiCGStab,Arnoldi} = GMRES(; tol=1e-14)
 end
 
-function svdwrap(t::AbstractTensorMap, alg::IterSVD; trunc=notrunc(), kwargs...)
-    U, S, V, = _tensorkit_svd!(copy(t); trunc, alg)  # TODO: Also replace this with tsvd eventually
-    ϵ = norm(t - U * S * V)  # Compute truncation error separately
-    return U, S, V, ϵ
+# Compute SVD data block-wise using KrylovKit algorithm
+function TensorKit._tsvd!(t, alg::IterSVD, trunc::TruncationScheme, p::Real=2)
+    # TODO
 end
 
-# Compute SVD data block-wise using KrylovKit algorithm
-function TensorKit._compute_svddata!(t::TensorMap, alg::IterSVD)
-    InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!)
-    I = sectortype(t)
-    A = storagetype(t)
-    Udata = SectorDict{I,A}()
-    Vdata = SectorDict{I,A}()
-    dims = SectorDict{I,Int}()
-    local Σdata
-    for (c, b) in blocks(t)
-        x₀ = randn(eltype(b), size(b, 1))
-        Σ, lvecs, rvecs, info = svdsolve(b, x₀, alg.howmany, :LR, alg.alg)
-        if info.converged < alg.howmany  # Fall back to dense SVD if not properly converged
-            U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
-            Udata[c] = U
-            Vdata[c] = V
-        else
-            Udata[c] = stack(lvecs)
-            Vdata[c] = stack(rvecs)'
-        end
-        if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction
-            Σdata[c] = Σ
-        else
-            Σdata = SectorDict(c => Σ)
-        end
-        dims[c] = length(Σ)
-    end
-    return Udata, Σdata, Vdata, dims
+# function TensorKit._compute_svddata!(t::TensorMap, alg::IterSVD)
+#     InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!)
+#     I = sectortype(t)
+#     A = storagetype(t)
+#     Udata = SectorDict{I,A}()
+#     Vdata = SectorDict{I,A}()
+#     dims = SectorDict{I,Int}()
+#     local Σdata
+#     for (c, b) in blocks(t)
+#         x₀ = randn(eltype(b), size(b, 1))
+#         Σ, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, alg.howmany, :LR, alg.alg)
+#         if info.converged < alg.howmany  # Fall back to dense SVD if not properly converged
+#             U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
+#             Udata[c] = U
+#             Vdata[c] = V
+#         else
+#             Udata[c] = stack(lvecs)
+#             Vdata[c] = stack(rvecs)'
+#         end
+#         if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction
+#             Σdata[c] = Σ
+#         else
+#             Σdata = SectorDict(c => Σ)
+#         end
+#         dims[c] = length(Σ)
+#     end
+#     return Udata, Σdata, Vdata, dims
+# end
+
+function ChainRulesCore.rrule(
+    ::typeof(TensorKit.tsvd),
+    t::AbstractTensorMap;
+    trunc::TruncationScheme=notrunc(),
+    p::Real=2,
+    alg::IterSVD=IterSVD(),
+)
+    # TODO: IterSVD adjoint utilizing KryloVKit svdsolve adjoint
 end
 
-# TODO: IterSVD adjoint utilizing KryloVKit svdsolve adjoint
 
 # Full SVD with old adjoint that doesn't account for truncation properly
 @kwdef struct OldSVD{A<:Union{FullSVD,IterSVD}}
@@ -110,17 +58,22 @@ end
     lorentz_broad::Float64 = 0.0
 end
 
-function svdwrap(t::AbstractTensorMap, alg::OldSVD; kwargs...)
-    return svdwrap(t, alg.alg; kwargs...)
+# Perform TensorKit.SVD in forward pass 
+function TensorKit._tsvd!(t, ::OldSVD, trunc::TruncationScheme, p::Real=2)
+    return TensorKit._tsvd(t, TensorKit.SVD(), trunc, p)
 end
 
-# Outdated adjoint not taking truncated part into account for testing purposes
+# Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes)
 function ChainRulesCore.rrule(
-    ::typeof(svdwrap), t::AbstractTensorMap, alg::OldSVD; kwargs...
+    ::typeof(TensorKit.tsvd),
+    t::AbstractTensorMap;
+    trunc::TruncationScheme=notrunc(),
+    p::Real=2,
+    alg::OldSVD=OldSVD(),
 )
-    U, S, V, ϵ = svdwrap(t, alg; kwargs...)
+    U, S, V, ϵ = tsvd(t; trunc, p, alg)
 
-    function svdwrap_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+    function tsvd_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         ∂t = similar(t)
         for (c, b) in blocks(∂t)
             copyto!(
@@ -139,7 +92,7 @@ function ChainRulesCore.rrule(
         return NoTangent(), ∂t, NoTangent()
     end
 
-    return (U, S, V, ϵ), svdwrap_oldsvd_pullback
+    return (U, S, V, ϵ), tsvd_oldsvd_pullback
 end
 
 function oldsvd_rev(
@@ -204,4 +157,4 @@ end
 function _lorentz_broaden(x::Real, ε=1e-12)
     x′ = 1 / x
     return x′ / (x′^2 + ε)
-end
\ No newline at end of file
+end
diff --git a/test/svdwrap.jl b/test/ctmrg/svd_wrapper.jl
similarity index 74%
rename from test/svdwrap.jl
rename to test/ctmrg/svd_wrapper.jl
index 3c3a0748..ab0989df 100644
--- a/test/svdwrap.jl
+++ b/test/ctmrg/svd_wrapper.jl
@@ -8,7 +8,7 @@ using PEPSKit
 
 # Gauge-invariant loss function
 function lossfun(A, alg, R=TensorMap(randn, space(A)); trunc=notrunc())
-    U, _, V, = svdwrap(A, alg; trunc)
+    U, _, V, = tsvd(A; trunc, alg)
     return real(dot(R, U * V))  # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n
 end
 
@@ -25,27 +25,25 @@ R = TensorMap(randn, space(r))
 @testset "Non-truncacted SVD" begin
     l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R), r)
     l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r)
-    # l_itersvd, g_itersvd = withgradient(
-    #     A -> lossfun(A, IterSVD(; howmany=min(m, n), adjoint_tol), R), r
-    # )
+    l_itersvd, g_itersvd = withgradient(
+        A -> lossfun(A, IterSVD(; howmany=min(m, n), adjoint_tol), R), r
+    )
 
-    @test l_oldsvd ≈ l_fullsvd 
-    # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
+    @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol
-    # @test norm(g_tsvd[1] - g_itersvd[1]) / norm(g_tsvd[1]) < rtol
+    @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
 end
 
 @testset "Truncated SVD with χ=$χ" begin
     l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R; trunc), r)
     l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R; trunc), r)
-    # l_itersvd, g_itersvd = withgradient(
-    #     A -> lossfun(A, IterSVD(; howmany=χ, adjoint_tol), R; trunc), r
-    # )
+    l_itersvd, g_itersvd = withgradient(
+        A -> lossfun(A, IterSVD(; howmany=χ, adjoint_tol), R; trunc), r
+    )
 
-    @test l_oldsvd ≈ l_fullsvd 
-    # @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
+    @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol
-    # @test norm(g_tsvd[1] - g_itersvd[1]) / norm(g_tsvd[1]) < rtol
+    @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
 end
 
 # @testset "Truncated SVD with χ=$χ and ε=$lorentz_broad broadening" begin
diff --git a/test/runtests.jl b/test/runtests.jl
index af2023e1..33e2fc42 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,6 +10,9 @@ const GROUP = get(ENV, "GROUP", "All")
         @time @safetestset "Gradients" begin
             include("ctmrg/gradients.jl")
         end
+        @time @safetestset "Gradients" begin
+            include("ctmrg/svd_wrapper.jl")
+        end
     end
     if GROUP == "All" || GROUP == "MPS"
         @time @safetestset "VUMPS" begin

From 823f52ef331e761e5831a79d3a1b30882d6b9e5f Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Fri, 5 Jul 2024 12:18:30 +0200
Subject: [PATCH 09/27] Add IterSVD _tsvd! method and adjoint using
 KrylovKit.svdsolve adjoint

---
 src/utility/svd.jl | 131 +++++++++++++++++++++++++++++++--------------
 1 file changed, 92 insertions(+), 39 deletions(-)

diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index ebcc9784..6995fc94 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -8,53 +8,106 @@ using TensorKit: SectorDict
 end
 
 # Compute SVD data block-wise using KrylovKit algorithm
-function TensorKit._tsvd!(t, alg::IterSVD, trunc::TruncationScheme, p::Real=2)
-    # TODO
-end
+function _tsvd!(
+    t,
+    alg::IterSVD,
+    trunc::Union{TensorKit.NoTruncation,TensorKit.TruncationSpace},
+    p::Real=2,
+)
+    # early return
+    if isempty(blocksectors(t))
+        truncerr = zero(real(scalartype(t)))
+        return _empty_svdtensors(t)..., truncerr
+    end
+
+    Udata, Σdata, Vdata, dims = _compute_svddata!(t, alg, trunc)
+    U, S, V = _create_svdtensors(t, Udata, Σdata, Vdata, spacetype(t)(dims))
+    truncerr = trunc isa NoTruncation ? abs(zero(scalartype(t))) : norm(U * S * V - t, p)
 
-# function TensorKit._compute_svddata!(t::TensorMap, alg::IterSVD)
-#     InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!)
-#     I = sectortype(t)
-#     A = storagetype(t)
-#     Udata = SectorDict{I,A}()
-#     Vdata = SectorDict{I,A}()
-#     dims = SectorDict{I,Int}()
-#     local Σdata
-#     for (c, b) in blocks(t)
-#         x₀ = randn(eltype(b), size(b, 1))
-#         Σ, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, alg.howmany, :LR, alg.alg)
-#         if info.converged < alg.howmany  # Fall back to dense SVD if not properly converged
-#             U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
-#             Udata[c] = U
-#             Vdata[c] = V
-#         else
-#             Udata[c] = stack(lvecs)
-#             Vdata[c] = stack(rvecs)'
-#         end
-#         if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction
-#             Σdata[c] = Σ
-#         else
-#             Σdata = SectorDict(c => Σ)
-#         end
-#         dims[c] = length(Σ)
-#     end
-#     return Udata, Σdata, Vdata, dims
-# end
+    return U, S, V, truncerr
+end
+function TensorKit._compute_svddata!(
+    t::TensorMap,
+    alg::IterSVD,
+    trunc::Union{TensorKit.NoTruncation,TensorKit.TruncationSpace},
+)
+    InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!)
+    I = sectortype(t)
+    A = storagetype(t)
+    Udata = SectorDict{I,A}()
+    Vdata = SectorDict{I,A}()
+    dims = SectorDict{I,Int}()
+    local Σdata
+    for (c, b) in blocks(t)
+        x₀ = randn(eltype(b), size(b, 1))
+        howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c)
+        Σ, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg)
+        if info.converged < howmany  # Fall back to dense SVD if not properly converged
+            U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
+            Udata[c] = U
+            Vdata[c] = V
+        else
+            Udata[c] = stack(lvecs)
+            Vdata[c] = stack(rvecs)'
+        end
+        if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction
+            Σdata[c] = Σ
+        else
+            Σdata = SectorDict(c => Σ)
+        end
+        dims[c] = length(Σ)
+    end
+    return Udata, Σdata, Vdata, dims
+end
 
+# IterSVD adjoint for tsvd! using KrylovKit.svdsolve adjoint machinery for each block
 function ChainRulesCore.rrule(
-    ::typeof(TensorKit.tsvd),
+    ::typeof(TensorKit.tsvd!),
     t::AbstractTensorMap;
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
     alg::IterSVD=IterSVD(),
 )
-    # TODO: IterSVD adjoint utilizing KryloVKit svdsolve adjoint
-end
+    U, S, V, ϵ = tsvd(t; trunc, p, alg)
 
+    function tsvd!_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+        Δt = similar(t)
+        for (c, b) in blocks(Δt)
+            Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
+            ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c)
+            Sdc = view(Sc, diagind(Sc))
+            ΔSdc = (ΔSc isa AbstractZero) ? ΔSc : view(ΔSc, diagind(ΔSc))
+
+            lvecs = eachcol(Uc)
+            rvecs = eachrow(Vc)
+            minimal_info = KrylovKit.ConvergenceInfo(length(Sdc), nothing, nothing, -1, -1)  # Just supply converged to SVD pullback
+            xs, ys = compute_svdsolve_pullback_data(
+                ΔSdc,
+                eachcol(ΔUc),
+                eachrow(ΔVc),
+                Sdc,
+                lvecs,
+                rvecs,
+                minimal_info,
+                b,
+                :LR,
+                alg.alg,
+                alg.alg_rrule,
+            )
+            copyto!(b, construct∂f_svd(config, b, lvecs, rvecs, xs, ys))
+        end
+        return NoTangent(), Δt
+    end
+    function tsvd!_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
+        return NoTangent(), ZeroTangent()
+    end
+
+    return (U, S, V, ϵ), tsvd!_itersvd_pullback
+end
 
 # Full SVD with old adjoint that doesn't account for truncation properly
-@kwdef struct OldSVD{A<:Union{FullSVD,IterSVD}}
-    alg::A = FullSVD()
+@kwdef struct OldSVD{A<:Union{TensorKit.SDD,TensorKit.SVD,IterSVD}}
+    alg::A = TensorKit.SVD()
     lorentz_broad::Float64 = 0.0
 end
 
@@ -65,7 +118,7 @@ end
 
 # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes)
 function ChainRulesCore.rrule(
-    ::typeof(TensorKit.tsvd),
+    ::typeof(TensorKit.tsvd!),
     t::AbstractTensorMap;
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
@@ -73,7 +126,7 @@ function ChainRulesCore.rrule(
 )
     U, S, V, ϵ = tsvd(t; trunc, p, alg)
 
-    function tsvd_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+    function tsvd!_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         ∂t = similar(t)
         for (c, b) in blocks(∂t)
             copyto!(
@@ -92,7 +145,7 @@ function ChainRulesCore.rrule(
         return NoTangent(), ∂t, NoTangent()
     end
 
-    return (U, S, V, ϵ), tsvd_oldsvd_pullback
+    return (U, S, V, ϵ), tsvd!_oldsvd_pullback
 end
 
 function oldsvd_rev(

From a615b4a22744093e594acb842ae650669444b27f Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Fri, 5 Jul 2024 16:44:21 +0200
Subject: [PATCH 10/27] Add PEPSKit.tsvd wrapper, fix IterSVD adjoint

---
 src/algorithms/ctmrg.jl   |   2 +-
 src/utility/svd.jl        | 133 +++++++++++++++++++++-----------------
 test/ctmrg/svd_wrapper.jl |  23 +++----
 3 files changed, 88 insertions(+), 70 deletions(-)

diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index 70cbe133..5b4d65dc 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -336,7 +336,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
                 alg.trscheme
             end
             @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6]
-            U, S, V, ϵ_local = tsvd(QQ; trunc=trscheme, alg=alg.svdalg)
+            U, S, V, ϵ_local = PEPSKit.tsvd(QQ, alg.svdalg; trunc=trscheme)
             ϵ = max(ϵ, ϵ_local / norm(S))
             # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better
 
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 6995fc94..1dae6534 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -1,4 +1,19 @@
-using TensorKit: SectorDict
+using TensorKit:
+    SectorDict,
+    _tsvd!,
+    _empty_svdtensors,
+    _compute_svddata!,
+    _create_svdtensors,
+    NoTruncation,
+    TruncationSpace
+CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt)
+
+# SVD wrapper for TensorKit.tsvd that dispatches on the alg type
+function PEPSKit.tsvd(
+    t::AbstractTensorMap, alg; trunc::TruncationScheme=notrunc(), p::Real=2
+)
+    return TensorKit.tsvd(t; alg, trunc, p)
+end
 
 # Wrapper around Krylov Kit's GKL iterative SVD solver
 @kwdef struct IterSVD
@@ -8,11 +23,8 @@ using TensorKit: SectorDict
 end
 
 # Compute SVD data block-wise using KrylovKit algorithm
-function _tsvd!(
-    t,
-    alg::IterSVD,
-    trunc::Union{TensorKit.NoTruncation,TensorKit.TruncationSpace},
-    p::Real=2,
+function TensorKit._tsvd!(
+    t, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2
 )
     # early return
     if isempty(blocksectors(t))
@@ -27,9 +39,7 @@ function _tsvd!(
     return U, S, V, truncerr
 end
 function TensorKit._compute_svddata!(
-    t::TensorMap,
-    alg::IterSVD,
-    trunc::Union{TensorKit.NoTruncation,TensorKit.TruncationSpace},
+    t::TensorMap, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace}
 )
     InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!)
     I = sectortype(t)
@@ -37,72 +47,83 @@ function TensorKit._compute_svddata!(
     Udata = SectorDict{I,A}()
     Vdata = SectorDict{I,A}()
     dims = SectorDict{I,Int}()
-    local Σdata
+    local Sdata
     for (c, b) in blocks(t)
         x₀ = randn(eltype(b), size(b, 1))
         howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c)
-        Σ, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg)
+        S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg)
         if info.converged < howmany  # Fall back to dense SVD if not properly converged
-            U, Σ, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
+            U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
             Udata[c] = U
             Vdata[c] = V
-        else
-            Udata[c] = stack(lvecs)
-            Vdata[c] = stack(rvecs)'
+        else  # Slice in case more values were converged than requested
+            Udata[c] = stack(lvecs[1:howmany])
+            Vdata[c] = stack(rvecs[1:howmany])'
+            S = S[1:howmany]
         end
-        if @isdefined Σdata # cannot easily infer the type of Σ, so use this construction
-            Σdata[c] = Σ
+        if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction
+            Sdata[c] = S
         else
-            Σdata = SectorDict(c => Σ)
+            Sdata = SectorDict(c => S)
         end
-        dims[c] = length(Σ)
+        dims[c] = length(S)
     end
-    return Udata, Σdata, Vdata, dims
+    return Udata, Sdata, Vdata, dims
 end
 
 # IterSVD adjoint for tsvd! using KrylovKit.svdsolve adjoint machinery for each block
 function ChainRulesCore.rrule(
-    ::typeof(TensorKit.tsvd!),
-    t::AbstractTensorMap;
+    ::typeof(PEPSKit.tsvd),
+    t::AbstractTensorMap,
+    alg::IterSVD;
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
-    alg::IterSVD=IterSVD(),
 )
-    U, S, V, ϵ = tsvd(t; trunc, p, alg)
+    U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p)
 
-    function tsvd!_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+    function tsvd_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         Δt = similar(t)
         for (c, b) in blocks(Δt)
             Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
             ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c)
             Sdc = view(Sc, diagind(Sc))
-            ΔSdc = (ΔSc isa AbstractZero) ? ΔSc : view(ΔSc, diagind(ΔSc))
+            ΔSdc = ΔSc isa AbstractZero ? ΔSc : view(ΔSc, diagind(ΔSc))
 
-            lvecs = eachcol(Uc)
-            rvecs = eachrow(Vc)
+            n_vals = length(Sdc)
+            lvecs = Vector{Vector{scalartype(t)}}(eachcol(Uc))
+            rvecs = Vector{Vector{scalartype(t)}}(eachcol(Vc'))
             minimal_info = KrylovKit.ConvergenceInfo(length(Sdc), nothing, nothing, -1, -1)  # Just supply converged to SVD pullback
-            xs, ys = compute_svdsolve_pullback_data(
-                ΔSdc,
-                eachcol(ΔUc),
-                eachrow(ΔVc),
+
+            if ΔUc isa AbstractZero && ΔVc isa AbstractZero  # Handle ZeroTangent singular vectors
+                Δlvecs = fill(ZeroTangent(), n_vals)
+                Δrvecs = fill(ZeroTangent(), n_vals)
+            else
+                Δlvecs = Vector{Vector{scalartype(t)}}(eachcol(ΔUc))
+                Δrvecs = Vector{Vector{scalartype(t)}}(eachcol(ΔVc'))
+            end
+
+            xs, ys = CRCExt.compute_svdsolve_pullback_data(
+                ΔSc isa AbstractZero ? fill(zero(Sc[1]), n_vals) : ΔSdc,
+                Δlvecs,
+                Δrvecs,
                 Sdc,
                 lvecs,
                 rvecs,
                 minimal_info,
-                b,
+                block(t, c),
                 :LR,
                 alg.alg,
                 alg.alg_rrule,
             )
-            copyto!(b, construct∂f_svd(config, b, lvecs, rvecs, xs, ys))
+            copyto!(b, CRCExt.construct∂f_svd(HasReverseMode(), block(t, c), lvecs, rvecs, xs, ys))
         end
-        return NoTangent(), Δt
+        return NoTangent(), Δt, NoTangent()
     end
-    function tsvd!_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
-        return NoTangent(), ZeroTangent()
+    function tsvd_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
+        return NoTangent(), ZeroTangent(), NoTangent()
     end
 
-    return (U, S, V, ϵ), tsvd!_itersvd_pullback
+    return (U, S, V, ϵ), tsvd_itersvd_pullback
 end
 
 # Full SVD with old adjoint that doesn't account for truncation properly
@@ -113,39 +134,35 @@ end
 
 # Perform TensorKit.SVD in forward pass 
 function TensorKit._tsvd!(t, ::OldSVD, trunc::TruncationScheme, p::Real=2)
-    return TensorKit._tsvd(t, TensorKit.SVD(), trunc, p)
+    return _tsvd!(t, TensorKit.SVD(), trunc, p)
 end
 
 # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes)
 function ChainRulesCore.rrule(
-    ::typeof(TensorKit.tsvd!),
-    t::AbstractTensorMap;
+    ::typeof(PEPSKit.tsvd),
+    t::AbstractTensorMap,
+    alg::OldSVD;
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
-    alg::OldSVD=OldSVD(),
 )
-    U, S, V, ϵ = tsvd(t; trunc, p, alg)
+    U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p)
 
-    function tsvd!_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
-        ∂t = similar(t)
-        for (c, b) in blocks(∂t)
+    function tsvd_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+        Δt = similar(t)
+        for (c, b) in blocks(Δt)
+            Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
+            ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c)
             copyto!(
-                b,
-                oldsvd_rev(
-                    block(U, c),
-                    block(S, c),
-                    block(V, c),
-                    block(ΔU, c),
-                    block(ΔS, c),
-                    block(ΔV, c);
-                    lorentz_broad=alg.lorentz_broad,
-                ),
+                b, oldsvd_rev(Uc, Sc, Vc, ΔUc, ΔSc, ΔVc; lorentz_broad=alg.lorentz_broad)
             )
         end
-        return NoTangent(), ∂t, NoTangent()
+        return NoTangent(), Δt, NoTangent()
+    end
+    function tsvd_oldsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
+        return NoTangent(), ZeroTangent(), NoTangent()
     end
 
-    return (U, S, V, ϵ), tsvd!_oldsvd_pullback
+    return (U, S, V, ϵ), tsvd_oldsvd_pullback
 end
 
 function oldsvd_rev(
diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl
index ab0989df..c725a6b1 100644
--- a/test/ctmrg/svd_wrapper.jl
+++ b/test/ctmrg/svd_wrapper.jl
@@ -7,45 +7,46 @@ using ChainRulesCore, Zygote
 using PEPSKit
 
 # Gauge-invariant loss function
-function lossfun(A, alg, R=TensorMap(randn, space(A)); trunc=notrunc())
-    U, _, V, = tsvd(A; trunc, alg)
+function lossfun(A, alg, R=TensorMap(randn, space(A)), trunc=notrunc())
+    U, _, V, = PEPSKit.tsvd(A, alg; trunc)
     return real(dot(R, U * V))  # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n
 end
 
 m, n = 20, 30
 dtype = ComplexF64
 χ = 12
-trunc = truncdim(χ)
+trunc = truncspace(ℂ^χ)
 # lorentz_broad = 1e-12
-adjoint_tol = 1e-16
+adjoint_tol = 1e-14  # Don't make this too small, g_itersvd will be weird
 rtol = 1e-9
 r = TensorMap(randn, dtype, ℂ^m ← ℂ^n)
 R = TensorMap(randn, space(r))
 
 @testset "Non-truncacted SVD" begin
-    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R), r)
+    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R), r)
     l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r)
     l_itersvd, g_itersvd = withgradient(
-        A -> lossfun(A, IterSVD(; howmany=min(m, n), adjoint_tol), R), r
+        A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R, truncspace(ℂ^min(m, n))), r
     )
 
-    @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
+    @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd
     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol
     @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
 end
 
 @testset "Truncated SVD with χ=$χ" begin
-    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, FullSVD(), R; trunc), r)
-    l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R; trunc), r)
+    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R, trunc), r)
+    l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R, trunc), r)
     l_itersvd, g_itersvd = withgradient(
-        A -> lossfun(A, IterSVD(; howmany=χ, adjoint_tol), R; trunc), r
+        A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R, trunc), r
     )
 
-    @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
+    @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd
     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol
     @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
 end
 
+# TODO: Add when Lorentzian broadening is implemented
 # @testset "Truncated SVD with χ=$χ and ε=$lorentz_broad broadening" begin
 #     l_fullsvd, g_fullsvd = withgradient(
 #         A -> lossfun(A, FullSVD(; lorentz_broad, R; trunc), r

From a815e41846e7aa80f4a9f69ed559afc675c9ed10 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Fri, 5 Jul 2024 16:48:37 +0200
Subject: [PATCH 11/27] Add TensorKit compat entry for softened tsvd type
 restrictions

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 98a70c11..95074716 100644
--- a/Project.toml
+++ b/Project.toml
@@ -28,7 +28,7 @@ MPSKit = "0.10"
 OptimKit = "0.3"
 Printf = "1"
 Statistics = "1"
-TensorKit = "0.12"
+TensorKit = "0.12.5"
 VectorInterface = "0.4"
 Zygote = "0.6"
 julia = "1.6"

From 447bfd83fece742475044c513ac8ceb91142e897 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Fri, 5 Jul 2024 17:40:23 +0200
Subject: [PATCH 12/27] Add ProjectorAlg and refactor all tests and examples

---
 examples/boundary_mps.jl  |  8 ++++--
 examples/heisenberg.jl    | 11 ++++----
 src/PEPSKit.jl            |  2 +-
 src/algorithms/ctmrg.jl   | 59 ++++++++++++++++++++++++++++-----------
 src/utility/svd.jl        | 14 ++++++++--
 test/boundarymps/vumps.jl |  8 ++++--
 test/ctmrg/gaugefix.jl    | 16 +++++------
 test/ctmrg/gradients.jl   |  5 ++--
 test/ctmrg/gradparts.jl   |  5 ++--
 test/ctmrg/svd_wrapper.jl |  2 +-
 test/heisenberg.jl        |  3 +-
 test/pwave.jl             |  5 ++--
 12 files changed, 89 insertions(+), 49 deletions(-)

diff --git a/examples/boundary_mps.jl b/examples/boundary_mps.jl
index abd52e17..558b0de2 100644
--- a/examples/boundary_mps.jl
+++ b/examples/boundary_mps.jl
@@ -33,7 +33,9 @@ N = abs(prod(expectation_value(mps, T)))
 
 # This can be compared to the result obtained using the CTMRG algorithm
 ctm = leading_boundary(
-    peps, CTMRG(; verbosity=1, fixedspace=true), CTMRGEnv(peps; Venv=ComplexSpace(20))
+    peps,
+    CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)),
+    CTMRGEnv(peps; Venv=ComplexSpace(20)),
 )
 N´ = abs(norm(peps, ctm))
 
@@ -56,7 +58,9 @@ mps2, envs2, ϵ = leading_boundary(mps2, T2, VUMPS())
 N2 = abs(prod(expectation_value(mps2, T2)))
 
 ctm2 = leading_boundary(
-    peps2, CTMRG(; verbosity=1, fixedspace=true), CTMRGEnv(peps2; Venv=ComplexSpace(20))
+    peps2,
+    CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)),
+    CTMRGEnv(peps2; Venv=ComplexSpace(20)),
 )
 N2´ = abs(norm(peps2, ctm2))
 
diff --git a/examples/heisenberg.jl b/examples/heisenberg.jl
index 0cfd8c06..65cf65e5 100644
--- a/examples/heisenberg.jl
+++ b/examples/heisenberg.jl
@@ -11,9 +11,10 @@ H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1)
 # Parameters
 χbond = 2
 χenv = 20
-ctmalg = CTMRG(; trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=100, verbosity=1)
-alg = PEPSOptimize(;
-    boundary_alg=ctmalg,
+projector_alg = ProjectorAlg(; trscheme=truncdim(χenv))
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, projector_alg)
+opt_alg = PEPSOptimize(;
+    boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2),
     gradient_alg=GMRES(; tol=1e-6, maxiter=100),
     reuse_env=true,
@@ -27,6 +28,6 @@ alg = PEPSOptimize(;
 # E/N = −0.6694421, which is a QMC estimate from https://arxiv.org/abs/1101.3281.
 # Of course there is a noticable bias for small χbond and χenv.
 ψ₀ = InfinitePEPS(2, χbond)
-env₀ = leading_boundary(CTMRGEnv(ψ₀; Venv=ℂ^χenv), ψ₀, ctmalg)
-result = fixedpoint(ψ₀, H, alg, env₀)
+env₀ = leading_boundary(CTMRGEnv(ψ₀; Venv=ℂ^χenv), ψ₀, ctm_alg)
+result = fixedpoint(ψ₀, H, opt_alg, env₀)
 @show result.E
diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl
index 1ee02e1f..b10f10c8 100644
--- a/src/PEPSKit.jl
+++ b/src/PEPSKit.jl
@@ -59,7 +59,7 @@ module Defaults
 end
 
 export FullSVD, IterSVD, OldSVD
-export CTMRG, CTMRGEnv
+export ProjectorAlg, CTMRG, CTMRGEnv
 export NLocalOperator, AnisotropicNNOperator, OnSite, NearestNeighbor
 export expectation_value, costfun
 export leading_boundary
diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index 5b4d65dc..46ed8186 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -1,25 +1,46 @@
+# TODO: add option for different projector styles (half-infinite, full-infinite, etc.)
+"""
+    struct ProjectorAlg{S}(; svd_alg = TensorKit.SVD(), trscheme = TensorKit.notrunc(),
+                           fixedspace = false, verbosity = 0)
+
+Algorithm struct collecting all projector related parameters. The truncation scheme has to be
+a `TensorKit.TruncationScheme`, and some SVD algorithms might have further restrictions on what
+kind of truncation scheme can be used. If `fixedspace` is true, the truncation scheme is set to
+`truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding
+environment direction/unit cell entry.
+"""
+@kwdef struct ProjectorAlg{S}
+    svd_alg::S = TensorKit.SVD()
+    trscheme::TruncationScheme = TensorKit.notrunc()
+    fixedspace::Bool = false
+    verbosity::Int = 0
+end
+
+function fix_spaces(alg::ProjectorAlg)
+    return ProjectorAlg(;
+        svd_alg=alg.svd_alg, trscheme=alg.trscheme, fixedspace=true, verbosity=alg.verbosity
+    )
+end
+
 # TODO: add abstract Algorithm type?
 """
-    struct CTMRG(; trscheme = TensorKit.notrunc(), tol = Defaults.ctmrg_tol,
-                 maxiter = Defaults.ctmrg_maxiter, miniter = Defaults.ctmrg_miniter,
-                 verbosity = 0, fixedspace = false)
+    struct CTMRG(; tol = Defaults.ctmrg_tol, maxiter = Defaults.ctmrg_maxiter,
+                 miniter = Defaults.ctmrg_miniter, verbosity = 0,
+                 projector_alg = ProjectorAlg())
 
 Algorithm struct that represents the CTMRG algorithm for contracting infinite PEPS.
-The projector bond dimensions are set via `trscheme` which controls the truncation
-properties inside of `TensorKit.tsvd`. Each CTMRG run is converged up to `tol`
-where the singular value convergence of the corners as well as the norm is checked.
-The maximal and minimal number of CTMRG iterations is set with `maxiter` and `miniter`.
-Different levels of output information are printed depending on `verbosity` (0, 1 or 2).
-Regardless of the truncation scheme, the space can be kept fixed with `fixedspace`.
+Each CTMRG run is converged up to `tol` where the singular value convergence of the
+corners as well as the norm is checked. The maximal and minimal number of CTMRG iterations
+is set with `maxiter` and `miniter`. Different levels of output information are printed
+depending on `verbosity` (0, 1 or 2). All projector related properties are set using the
+`ProjectorAlg` struct.
 """
-@kwdef struct CTMRG{S}
+@kwdef struct CTMRG
     tol::Float64 = Defaults.ctmrg_tol
     maxiter::Int = Defaults.ctmrg_maxiter
     miniter::Int = Defaults.ctmrg_miniter
     verbosity::Int = 0
-    svdalg::S = TensorKit.SVD()
-    trscheme::TruncationScheme = TensorKit.notrunc()
-    fixedspace::Bool = false
+    projector_alg::ProjectorAlg = ProjectorAlg()
 end
 
 """
@@ -89,7 +110,11 @@ function MPSKit.leading_boundary(envinit, state, alg::CTMRG)
 
     # Do one final iteration that does not change the spaces
     alg_fixed = CTMRG(;
-        alg.trscheme, alg.tol, alg.maxiter, alg.miniter, alg.verbosity, fixedspace=true
+        tol=alg.tol,
+        maxiter=alg.maxiter,
+        miniter=alg.miniter,
+        verbosity=alg.verbosity,
+        projector_alg=fix_spaces(alg.projector_alg),
     )
     env′, = ctmrg_iter(state, env, alg_fixed)
     envfix = gauge_fix(env, env′)
@@ -285,7 +310,7 @@ function ctmrg_iter(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
     ϵ = 0.0
 
     for _ in 1:4
-        env, _, _, ϵ₀ = left_move(state, env, alg)
+        env, _, _, ϵ₀ = left_move(state, env, alg.projector_alg)
         state = rotate_north(state, EAST)
         env = rotate_north(env, EAST)
         ϵ = max(ϵ, ϵ₀)
@@ -300,7 +325,7 @@ end
 Grow, project and renormalize the environment `env` in west direction.
 Return the updated environment as well as the projectors and truncation error.
 """
-function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
+function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T}
     corners::typeof(env.corners) = copy(env.corners)
     edges::typeof(env.edges) = copy(env.edges)
     ϵ = 0.0
@@ -336,7 +361,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T}
                 alg.trscheme
             end
             @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6]
-            U, S, V, ϵ_local = PEPSKit.tsvd(QQ, alg.svdalg; trunc=trscheme)
+            U, S, V, ϵ_local = PEPSKit.tsvd(QQ, alg.svd_alg; trunc=trscheme)
             ϵ = max(ϵ, ϵ_local / norm(S))
             # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better
 
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 1dae6534..23473bef 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -8,7 +8,14 @@ using TensorKit:
     TruncationSpace
 CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt)
 
-# SVD wrapper for TensorKit.tsvd that dispatches on the alg type
+"""
+    PEPSKit.tsvd(t::AbstractTensorMap, alg; trunc=notrunc(), p=2)
+
+Wrapper around `TensorKit.tsvd` which dispatches on the `alg` argument.
+This is needed since a custom adjoint for `PEPSKit.tsvd` may be defined,
+depending on the algorithm. E.g., for `IterSVD` the adjoint for a truncated
+SVD from `KrylovKit.svdsolve` is used.
+"""
 function PEPSKit.tsvd(
     t::AbstractTensorMap, alg; trunc::TruncationScheme=notrunc(), p::Real=2
 )
@@ -115,7 +122,10 @@ function ChainRulesCore.rrule(
                 alg.alg,
                 alg.alg_rrule,
             )
-            copyto!(b, CRCExt.construct∂f_svd(HasReverseMode(), block(t, c), lvecs, rvecs, xs, ys))
+            copyto!(
+                b,
+                CRCExt.construct∂f_svd(HasReverseMode(), block(t, c), lvecs, rvecs, xs, ys),
+            )
         end
         return NoTangent(), Δt, NoTangent()
     end
diff --git a/test/boundarymps/vumps.jl b/test/boundarymps/vumps.jl
index 260f40b3..f5d10216 100644
--- a/test/boundarymps/vumps.jl
+++ b/test/boundarymps/vumps.jl
@@ -17,7 +17,9 @@ Random.seed!(29384293742893)
     N = abs(sum(expectation_value(mps, T)))
 
     ctm = leading_boundary(
-        CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1, fixedspace=true)
+        CTMRGEnv(psi; Venv=ComplexSpace(20)),
+        psi,
+        CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)),
     )
     N´ = abs(norm(psi, ctm))
 
@@ -33,7 +35,9 @@ end
     N = abs(prod(expectation_value(mps, T)))
 
     ctm = leading_boundary(
-        CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1, fixedspace=true)
+        CTMRGEnv(psi; Venv=ComplexSpace(20)),
+        psi,
+        CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)),
     )
     N´ = abs(norm(psi, ctm))
 
diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl
index 45bee5d5..b5a7d8e6 100644
--- a/test/ctmrg/gaugefix.jl
+++ b/test/ctmrg/gaugefix.jl
@@ -3,7 +3,7 @@ using Random
 using PEPSKit
 using TensorKit
 
-using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence
+using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence, fix_space
 
 scalartypes = [Float64, ComplexF64]
 unitcells = [(1, 1), (2, 2), (3, 2)]
@@ -45,10 +45,9 @@ end
     ctm = CTMRGEnv(psi; Venv=ctm_space)
 
     verbosity = 1
-    alg = CTMRG(;
-        trscheme=truncdim(dim(ctm_space)), tol=1e-10, miniter=4, maxiter=400, verbosity
-    )
-    alg_fixed = CTMRG(; trscheme=truncdim(dim(ctm_space)), verbosity, fixedspace=true)
+    projector_alg = ProjectorAlg(; trscheme=truncdim(dim(ctm_space)))
+    alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, projector_alg)
+    alg_fixed = CTMRG(; verbosity, projector_alg=fix_spaces(projector_alg))
 
     ctm = leading_boundary(ctm, psi, alg)
     ctm2, = ctmrg_iter(psi, ctm, alg_fixed)
@@ -70,10 +69,9 @@ end
     ctm = CTMRGEnv(psi; Venv=ctm_space)
 
     verbosity = 1
-    alg = CTMRG(;
-        trscheme=truncspace(ctm_space), tol=1e-10, miniter=4, maxiter=400, verbosity
-    )
-    alg_fixed = CTMRG(; trscheme=truncspace(ctm_space), verbosity, fixedspace=true)
+    projector_alg = ProjectorAlg(; trscheme=truncdim(dim(ctm_space)))
+    alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, projector_alg)
+    alg_fixed = CTMRG(; verbosity, projector_alg=fix_spaces(projector_alg))
 
     ctm = leading_boundary(ctm, psi, alg)
     ctm2, = ctmrg_iter(psi, ctm, alg_fixed)
diff --git a/test/ctmrg/gradients.jl b/test/ctmrg/gradients.jl
index 2c9b86f9..7ae40c3e 100644
--- a/test/ctmrg/gradients.jl
+++ b/test/ctmrg/gradients.jl
@@ -17,9 +17,8 @@ models = [square_lattice_heisenberg(), square_lattice_pwave()]
 names = ["Heisenberg", "p-wave superconductor"]
 Random.seed!(42039482030)
 tol = 1e-8
-boundary_alg = CTMRG(;
-    trscheme=truncdim(χenv), tol=tol, miniter=4, maxiter=100, fixedspace=true, verbosity=0
-)
+projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true)
+boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0, projector_alg)
 gradmodes = [
     nothing, GeomSum(; tol), ManualIter(; tol), KrylovKit.GMRES(; tol=tol, maxiter=10)
 ]
diff --git a/test/ctmrg/gradparts.jl b/test/ctmrg/gradparts.jl
index c21a2420..380bfb9e 100644
--- a/test/ctmrg/gradparts.jl
+++ b/test/ctmrg/gradparts.jl
@@ -30,9 +30,8 @@ Vspaces = [ComplexSpace(χbond), Vect[FermionParity](0 => χbond / 2, 1 => χbon
 Espaces = [ComplexSpace(χenv), Vect[FermionParity](0 => χenv / 2, 1 => χenv / 2)]
 functions = [left_move, ctmrg_iter, leading_boundary]
 tol = 1e-8
-boundary_alg = CTMRG(;
-    trscheme=truncdim(χenv), tol=tol, miniter=4, maxiter=100, fixedspace=true, verbosity=0
-)
+projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true)
+boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0, projector_alg)
 
 ## Gauge invariant function of the environment
 # --------------------------------------------
diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl
index c725a6b1..bae40eb8 100644
--- a/test/ctmrg/svd_wrapper.jl
+++ b/test/ctmrg/svd_wrapper.jl
@@ -26,7 +26,7 @@ R = TensorMap(randn, space(r))
     l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R), r)
     l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r)
     l_itersvd, g_itersvd = withgradient(
-        A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R, truncspace(ℂ^min(m, n))), r
+        A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R), r
     )
 
     @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd
diff --git a/test/heisenberg.jl b/test/heisenberg.jl
index 35e12ef7..2f569b9d 100644
--- a/test/heisenberg.jl
+++ b/test/heisenberg.jl
@@ -9,7 +9,8 @@ using OptimKit
 H = square_lattice_heisenberg()
 χbond = 2
 χenv = 16
-ctm_alg = CTMRG(; trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=100, verbosity=1)
+projector_alg = ProjectorAlg(; trscheme=truncdim(χenv))
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, projector_alg)
 opt_alg = PEPSOptimize(;
     boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=100, gradtol=1e-3, verbosity=2),
diff --git a/test/pwave.jl b/test/pwave.jl
index 4ebb9c85..afde9514 100644
--- a/test/pwave.jl
+++ b/test/pwave.jl
@@ -9,9 +9,8 @@ using OptimKit
 H = square_lattice_pwave()
 χbond = 2
 χenv = 16
-ctm_alg = CTMRG(;
-    trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=50, fixedspace=true, verbosity=1
-)
+projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true)
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=50, verbosity=1, projector_alg)
 opt_alg = PEPSOptimize(;
     boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=10, gradtol=1e-3, verbosity=2),

From bbd132d79f44a692af719d1bf649c2dadc49df1f Mon Sep 17 00:00:00 2001
From: Lukas <37111893+lkdvos@users.noreply.github.com>
Date: Fri, 5 Jul 2024 18:04:08 +0200
Subject: [PATCH 13/27] Update MPSKit compat

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 95074716..e913be9f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -24,7 +24,7 @@ ChainRulesCore = "1.0"
 Compat = "3.46, 4.2"
 KrylovKit = "0.8"
 LinearAlgebra = "1"
-MPSKit = "0.10"
+MPSKit = "0.11"
 OptimKit = "0.3"
 Printf = "1"
 Statistics = "1"

From 0c03cd43ad924468cac7024159b0452fb68a2e59 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Mon, 8 Jul 2024 12:12:08 +0200
Subject: [PATCH 14/27] Replace tsvd with tsvd!, add views to IterSVD adjoint

---
 Project.toml            |  2 +-
 src/algorithms/ctmrg.jl | 11 ++++++++++-
 src/utility/svd.jl      | 13 +++++++------
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/Project.toml b/Project.toml
index e913be9f..ef730624 100644
--- a/Project.toml
+++ b/Project.toml
@@ -31,7 +31,7 @@ Statistics = "1"
 TensorKit = "0.12.5"
 VectorInterface = "0.4"
 Zygote = "0.6"
-julia = "1.6"
+julia = "1.8"
 
 [extras]
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index 46ed8186..97b8bd26 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -1,3 +1,12 @@
+"""
+    FixedSpaceTruncation <: TensorKit.TruncationScheme
+
+CTMRG specific truncation scheme for `tsvd` which keeps the bond space on which the SVD
+is performed fixed. Since different environment directions and unit cell entries might
+have different spaces, this truncation style is different from `TruncationSpace`.
+"""
+struct FixedSpaceTruncation <: TensorKit.TruncationScheme end 
+
 # TODO: add option for different projector styles (half-infinite, full-infinite, etc.)
 """
     struct ProjectorAlg{S}(; svd_alg = TensorKit.SVD(), trscheme = TensorKit.notrunc(),
@@ -361,7 +370,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T}
                 alg.trscheme
             end
             @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6]
-            U, S, V, ϵ_local = PEPSKit.tsvd(QQ, alg.svd_alg; trunc=trscheme)
+            U, S, V, ϵ_local = PEPSKit.tsvd!(QQ, alg.svd_alg; trunc=trscheme)
             ϵ = max(ϵ, ϵ_local / norm(S))
             # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better
 
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 23473bef..6055cde4 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -16,10 +16,11 @@ This is needed since a custom adjoint for `PEPSKit.tsvd` may be defined,
 depending on the algorithm. E.g., for `IterSVD` the adjoint for a truncated
 SVD from `KrylovKit.svdsolve` is used.
 """
-function PEPSKit.tsvd(
+PEPSKit.tsvd(t::AbstractTensorMap, alg; kwargs...) = PEPSKit.tsvd!(copy(t), alg; kwargs...)
+function PEPSKit.tsvd!(
     t::AbstractTensorMap, alg; trunc::TruncationScheme=notrunc(), p::Real=2
 )
-    return TensorKit.tsvd(t; alg, trunc, p)
+    return TensorKit.tsvd!(t; alg, trunc, p)
 end
 
 # Wrapper around Krylov Kit's GKL iterative SVD solver
@@ -64,9 +65,9 @@ function TensorKit._compute_svddata!(
             Udata[c] = U
             Vdata[c] = V
         else  # Slice in case more values were converged than requested
-            Udata[c] = stack(lvecs[1:howmany])
-            Vdata[c] = stack(rvecs[1:howmany])'
-            S = S[1:howmany]
+            Udata[c] = stack(view(lvecs, 1:howmany))
+            Vdata[c] = stack(view(rvecs, 1:howmany))'
+            S = @view S[1:howmany]
         end
         if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction
             Sdata[c] = S
@@ -80,7 +81,7 @@ end
 
 # IterSVD adjoint for tsvd! using KrylovKit.svdsolve adjoint machinery for each block
 function ChainRulesCore.rrule(
-    ::typeof(PEPSKit.tsvd),
+    ::typeof(PEPSKit.tsvd!),
     t::AbstractTensorMap,
     alg::IterSVD;
     trunc::TruncationScheme=notrunc(),

From 14f0065b4f61ca77a4c21d0325ef0e005f5193c2 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Mon, 8 Jul 2024 15:12:07 +0200
Subject: [PATCH 15/27] Improve IterSVD allocation, implement CTMRG convenience
 constructor, update tests, implement FixedSpaceTruncation

---
 examples/boundary_mps.jl  | 12 ++-------
 examples/heisenberg.jl    |  3 +--
 src/PEPSKit.jl            |  2 +-
 src/algorithms/ctmrg.jl   | 57 +++++++++++++++++++--------------------
 src/utility/svd.jl        |  2 +-
 test/boundarymps/vumps.jl | 12 ++-------
 test/ctmrg/gaugefix.jl    | 12 ++++-----
 test/ctmrg/gradients.jl   |  3 +--
 test/ctmrg/gradparts.jl   |  3 +--
 test/heisenberg.jl        |  3 +--
 test/pwave.jl             |  3 +--
 11 files changed, 44 insertions(+), 68 deletions(-)

diff --git a/examples/boundary_mps.jl b/examples/boundary_mps.jl
index 558b0de2..8893f63e 100644
--- a/examples/boundary_mps.jl
+++ b/examples/boundary_mps.jl
@@ -32,11 +32,7 @@ mps, envs, ϵ = leading_boundary(mps, T, VUMPS())
 N = abs(prod(expectation_value(mps, T)))
 
 # This can be compared to the result obtained using the CTMRG algorithm
-ctm = leading_boundary(
-    peps,
-    CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)),
-    CTMRGEnv(peps; Venv=ComplexSpace(20)),
-)
+ctm = leading_boundary(peps, CTMRG(; verbosity=1), CTMRGEnv(peps; Venv=ComplexSpace(20)))
 N´ = abs(norm(peps, ctm))
 
 @show abs(N - N´) / N
@@ -57,11 +53,7 @@ mps2 = PEPSKit.initializeMPS(T2, fill(ComplexSpace(20), 2, 2))
 mps2, envs2, ϵ = leading_boundary(mps2, T2, VUMPS())
 N2 = abs(prod(expectation_value(mps2, T2)))
 
-ctm2 = leading_boundary(
-    peps2,
-    CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)),
-    CTMRGEnv(peps2; Venv=ComplexSpace(20)),
-)
+ctm2 = leading_boundary(peps2, CTMRG(; verbosity=1), CTMRGEnv(peps2; Venv=ComplexSpace(20)))
 N2´ = abs(norm(peps2, ctm2))
 
 @show abs(N2 - N2´) / N2
diff --git a/examples/heisenberg.jl b/examples/heisenberg.jl
index 65cf65e5..a43f313c 100644
--- a/examples/heisenberg.jl
+++ b/examples/heisenberg.jl
@@ -11,8 +11,7 @@ H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1)
 # Parameters
 χbond = 2
 χenv = 20
-projector_alg = ProjectorAlg(; trscheme=truncdim(χenv))
-ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, projector_alg)
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(χenv))
 opt_alg = PEPSOptimize(;
     boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2),
diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl
index b10f10c8..5d341885 100644
--- a/src/PEPSKit.jl
+++ b/src/PEPSKit.jl
@@ -59,7 +59,7 @@ module Defaults
 end
 
 export FullSVD, IterSVD, OldSVD
-export ProjectorAlg, CTMRG, CTMRGEnv
+export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv
 export NLocalOperator, AnisotropicNNOperator, OnSite, NearestNeighbor
 export expectation_value, costfun
 export leading_boundary
diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index 97b8bd26..1b424790 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -5,7 +5,7 @@ CTMRG specific truncation scheme for `tsvd` which keeps the bond space on which
 is performed fixed. Since different environment directions and unit cell entries might
 have different spaces, this truncation style is different from `TruncationSpace`.
 """
-struct FixedSpaceTruncation <: TensorKit.TruncationScheme end 
+struct FixedSpaceTruncation <: TensorKit.TruncationScheme end
 
 # TODO: add option for different projector styles (half-infinite, full-infinite, etc.)
 """
@@ -18,38 +18,43 @@ kind of truncation scheme can be used. If `fixedspace` is true, the truncation s
 `truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding
 environment direction/unit cell entry.
 """
-@kwdef struct ProjectorAlg{S}
+@kwdef struct ProjectorAlg{S,T}
     svd_alg::S = TensorKit.SVD()
-    trscheme::TruncationScheme = TensorKit.notrunc()
-    fixedspace::Bool = false
+    trscheme::T = FixedSpaceTruncation()
     verbosity::Int = 0
 end
 
-function fix_spaces(alg::ProjectorAlg)
-    return ProjectorAlg(;
-        svd_alg=alg.svd_alg, trscheme=alg.trscheme, fixedspace=true, verbosity=alg.verbosity
-    )
-end
-
 # TODO: add abstract Algorithm type?
 """
-    struct CTMRG(; tol = Defaults.ctmrg_tol, maxiter = Defaults.ctmrg_maxiter,
-                 miniter = Defaults.ctmrg_miniter, verbosity = 0,
-                 projector_alg = ProjectorAlg())
+    CTMRG(; tol=Defaults.ctmrg_tol, maxiter=Defaults.ctmrg_maxiter,
+          miniter=Defaults.ctmrg_miniter, verbosity=0,
+          svd_alg=TensorKit.SVD(), trscheme=FixedSpaceTruncation())
 
 Algorithm struct that represents the CTMRG algorithm for contracting infinite PEPS.
 Each CTMRG run is converged up to `tol` where the singular value convergence of the
 corners as well as the norm is checked. The maximal and minimal number of CTMRG iterations
 is set with `maxiter` and `miniter`. Different levels of output information are printed
-depending on `verbosity` (0, 1 or 2). All projector related properties are set using the
-`ProjectorAlg` struct.
+depending on `verbosity` (0, 1 or 2). The projectors are computed from `svd_alg` SVDs
+where the truncation scheme is set via `trscheme`.
 """
-@kwdef struct CTMRG
-    tol::Float64 = Defaults.ctmrg_tol
-    maxiter::Int = Defaults.ctmrg_maxiter
-    miniter::Int = Defaults.ctmrg_miniter
-    verbosity::Int = 0
-    projector_alg::ProjectorAlg = ProjectorAlg()
+struct CTMRG
+    tol::Float64
+    maxiter::Int
+    miniter::Int
+    verbosity::Int
+    projector_alg::ProjectorAlg
+end
+function CTMRG(;
+    tol=Defaults.ctmrg_tol,
+    maxiter=Defaults.ctmrg_maxiter,
+    miniter=Defaults.ctmrg_miniter,
+    verbosity=0,
+    svd_alg=TensorKit.SVD(),
+    trscheme=FixedSpaceTruncation(),
+)
+    return CTMRG(
+        tol, maxiter, miniter, verbosity, ProjectorAlg(; svd_alg, trscheme, verbosity)
+    )
 end
 
 """
@@ -118,13 +123,7 @@ function MPSKit.leading_boundary(envinit, state, alg::CTMRG)
     end
 
     # Do one final iteration that does not change the spaces
-    alg_fixed = CTMRG(;
-        tol=alg.tol,
-        maxiter=alg.maxiter,
-        miniter=alg.miniter,
-        verbosity=alg.verbosity,
-        projector_alg=fix_spaces(alg.projector_alg),
-    )
+    alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation()
     env′, = ctmrg_iter(state, env, alg_fixed)
     envfix = gauge_fix(env, env′)
     check_elementwise_convergence(env, envfix; atol=alg.tol^(1 / 2)) ||
@@ -364,7 +363,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T}
             )
 
             # SVD half-infinite environment
-            trscheme = if alg.fixedspace == true
+            trscheme = if alg.trscheme isa FixedSpaceTruncation
                 truncspace(space(env.edges[WEST, row, cnext], 1))
             else
                 alg.trscheme
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 6055cde4..1c24b6f5 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -66,7 +66,7 @@ function TensorKit._compute_svddata!(
             Vdata[c] = V
         else  # Slice in case more values were converged than requested
             Udata[c] = stack(view(lvecs, 1:howmany))
-            Vdata[c] = stack(view(rvecs, 1:howmany))'
+            Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1)
             S = @view S[1:howmany]
         end
         if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction
diff --git a/test/boundarymps/vumps.jl b/test/boundarymps/vumps.jl
index f5d10216..f8709028 100644
--- a/test/boundarymps/vumps.jl
+++ b/test/boundarymps/vumps.jl
@@ -16,11 +16,7 @@ Random.seed!(29384293742893)
     mps, envs, ϵ = leading_boundary(mps, T, VUMPS())
     N = abs(sum(expectation_value(mps, T)))
 
-    ctm = leading_boundary(
-        CTMRGEnv(psi; Venv=ComplexSpace(20)),
-        psi,
-        CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)),
-    )
+    ctm = leading_boundary(CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1))
     N´ = abs(norm(psi, ctm))
 
     @test N ≈ N´ atol = 1e-3
@@ -34,11 +30,7 @@ end
     mps, envs, ϵ = leading_boundary(mps, T, VUMPS())
     N = abs(prod(expectation_value(mps, T)))
 
-    ctm = leading_boundary(
-        CTMRGEnv(psi; Venv=ComplexSpace(20)),
-        psi,
-        CTMRG(; verbosity=1, projector_alg=ProjectorAlg(; fixedspace=true)),
-    )
+    ctm = leading_boundary(CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1))
     N´ = abs(norm(psi, ctm))
 
     @test N ≈ N´ rtol = 1e-3
diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl
index b5a7d8e6..268e579e 100644
--- a/test/ctmrg/gaugefix.jl
+++ b/test/ctmrg/gaugefix.jl
@@ -3,7 +3,7 @@ using Random
 using PEPSKit
 using TensorKit
 
-using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence, fix_space
+using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence
 
 scalartypes = [Float64, ComplexF64]
 unitcells = [(1, 1), (2, 2), (3, 2)]
@@ -45,9 +45,8 @@ end
     ctm = CTMRGEnv(psi; Venv=ctm_space)
 
     verbosity = 1
-    projector_alg = ProjectorAlg(; trscheme=truncdim(dim(ctm_space)))
-    alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, projector_alg)
-    alg_fixed = CTMRG(; verbosity, projector_alg=fix_spaces(projector_alg))
+    alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space)))
+    alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation()
 
     ctm = leading_boundary(ctm, psi, alg)
     ctm2, = ctmrg_iter(psi, ctm, alg_fixed)
@@ -69,9 +68,8 @@ end
     ctm = CTMRGEnv(psi; Venv=ctm_space)
 
     verbosity = 1
-    projector_alg = ProjectorAlg(; trscheme=truncdim(dim(ctm_space)))
-    alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, projector_alg)
-    alg_fixed = CTMRG(; verbosity, projector_alg=fix_spaces(projector_alg))
+    alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space)))
+    alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation()
 
     ctm = leading_boundary(ctm, psi, alg)
     ctm2, = ctmrg_iter(psi, ctm, alg_fixed)
diff --git a/test/ctmrg/gradients.jl b/test/ctmrg/gradients.jl
index 7ae40c3e..d7a842d6 100644
--- a/test/ctmrg/gradients.jl
+++ b/test/ctmrg/gradients.jl
@@ -17,8 +17,7 @@ models = [square_lattice_heisenberg(), square_lattice_pwave()]
 names = ["Heisenberg", "p-wave superconductor"]
 Random.seed!(42039482030)
 tol = 1e-8
-projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true)
-boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0, projector_alg)
+boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0)
 gradmodes = [
     nothing, GeomSum(; tol), ManualIter(; tol), KrylovKit.GMRES(; tol=tol, maxiter=10)
 ]
diff --git a/test/ctmrg/gradparts.jl b/test/ctmrg/gradparts.jl
index 380bfb9e..e1720db8 100644
--- a/test/ctmrg/gradparts.jl
+++ b/test/ctmrg/gradparts.jl
@@ -30,8 +30,7 @@ Vspaces = [ComplexSpace(χbond), Vect[FermionParity](0 => χbond / 2, 1 => χbon
 Espaces = [ComplexSpace(χenv), Vect[FermionParity](0 => χenv / 2, 1 => χenv / 2)]
 functions = [left_move, ctmrg_iter, leading_boundary]
 tol = 1e-8
-projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true)
-boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0, projector_alg)
+boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0)
 
 ## Gauge invariant function of the environment
 # --------------------------------------------
diff --git a/test/heisenberg.jl b/test/heisenberg.jl
index 2f569b9d..0891fea2 100644
--- a/test/heisenberg.jl
+++ b/test/heisenberg.jl
@@ -9,8 +9,7 @@ using OptimKit
 H = square_lattice_heisenberg()
 χbond = 2
 χenv = 16
-projector_alg = ProjectorAlg(; trscheme=truncdim(χenv))
-ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, projector_alg)
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(χenv))
 opt_alg = PEPSOptimize(;
     boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=100, gradtol=1e-3, verbosity=2),
diff --git a/test/pwave.jl b/test/pwave.jl
index afde9514..a94c1d0d 100644
--- a/test/pwave.jl
+++ b/test/pwave.jl
@@ -9,8 +9,7 @@ using OptimKit
 H = square_lattice_pwave()
 χbond = 2
 χenv = 16
-projector_alg = ProjectorAlg(; trscheme=truncdim(χenv), fixedspace=true)
-ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=50, verbosity=1, projector_alg)
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=50, verbosity=1)
 opt_alg = PEPSOptimize(;
     boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=10, gradtol=1e-3, verbosity=2),

From 9b2c4b7531181fbe980ad22fa47842a44f178525 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Mon, 8 Jul 2024 16:20:16 +0200
Subject: [PATCH 16/27] Fix tests

---
 test/boundarymps/vumps.jl | 2 +-
 test/ctmrg/gaugefix.jl    | 1 +
 test/ctmrg/gradparts.jl   | 6 ++----
 test/pwave.jl             | 4 ++--
 test/runtests.jl          | 2 +-
 5 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/test/boundarymps/vumps.jl b/test/boundarymps/vumps.jl
index f8709028..fa4b434d 100644
--- a/test/boundarymps/vumps.jl
+++ b/test/boundarymps/vumps.jl
@@ -33,7 +33,7 @@ end
     ctm = leading_boundary(CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1))
     N´ = abs(norm(psi, ctm))
 
-    @test N ≈ N´ rtol = 1e-3
+    @test N ≈ N´ rtol = 1e-2
 end
 
 @testset "PEPO runthrough" begin
diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl
index 268e579e..195df266 100644
--- a/test/ctmrg/gaugefix.jl
+++ b/test/ctmrg/gaugefix.jl
@@ -2,6 +2,7 @@ using Test
 using Random
 using PEPSKit
 using TensorKit
+using Accessors
 
 using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence
 
diff --git a/test/ctmrg/gradparts.jl b/test/ctmrg/gradparts.jl
index e1720db8..9c2e3442 100644
--- a/test/ctmrg/gradparts.jl
+++ b/test/ctmrg/gradparts.jl
@@ -49,10 +49,8 @@ end
 
 ## Tests
 # ------
-@testset "Reverse rules for composite parts of the CTMRG fixed point with spacetype $(Vspaces[i])" for i in
-                                                                                                       eachindex(
-    Pspaces
-)
+title = "Reverse rules for composite parts of the CTMRG fixed point with spacetype"
+@testset title * "$(Vspaces[i])" for i in eachindex(Pspaces)
     psi = InfinitePEPS(Pspaces[i], Vspaces[i], Vspaces[i])
     env = CTMRGEnv(psi; Venv=Espaces[i])
 
diff --git a/test/pwave.jl b/test/pwave.jl
index a94c1d0d..e9acf179 100644
--- a/test/pwave.jl
+++ b/test/pwave.jl
@@ -9,11 +9,11 @@ using OptimKit
 H = square_lattice_pwave()
 χbond = 2
 χenv = 16
-ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=50, verbosity=1)
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1)
 opt_alg = PEPSOptimize(;
     boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=10, gradtol=1e-3, verbosity=2),
-    gradient_alg=GMRES(; tol=1e-3, maxiter=2, krylovdim=50, verbosity=2),
+    gradient_alg=GMRES(; tol=1e-3, maxiter=2, krylovdim=50),
     reuse_env=true,
     verbosity=2,
 )
diff --git a/test/runtests.jl b/test/runtests.jl
index 33e2fc42..cd7d81d6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,7 +10,7 @@ const GROUP = get(ENV, "GROUP", "All")
         @time @safetestset "Gradients" begin
             include("ctmrg/gradients.jl")
         end
-        @time @safetestset "Gradients" begin
+        @time @safetestset "SVD wrapper" begin
             include("ctmrg/svd_wrapper.jl")
         end
     end

From 0c13d47a1c357ed6aae81385f686de4751044667 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Mon, 8 Jul 2024 17:03:04 +0200
Subject: [PATCH 17/27] Add block-wise dense fallback option

---
 src/utility/svd.jl | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 1c24b6f5..ded85014 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -26,13 +26,14 @@ end
 # Wrapper around Krylov Kit's GKL iterative SVD solver
 @kwdef struct IterSVD
     alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25)
+    fallback_threshold::Float64 = Inf
     lorentz_broad::Float64 = 0.0
     alg_rrule::Union{GMRES,BiCGStab,Arnoldi} = GMRES(; tol=1e-14)
 end
 
 # Compute SVD data block-wise using KrylovKit algorithm
 function TensorKit._tsvd!(
-    t, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2
+    t, alg::Union{IterSVD}, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2
 )
     # early return
     if isempty(blocksectors(t))
@@ -59,16 +60,24 @@ function TensorKit._compute_svddata!(
     for (c, b) in blocks(t)
         x₀ = randn(eltype(b), size(b, 1))
         howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c)
-        S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg)
-        if info.converged < howmany  # Fall back to dense SVD if not properly converged
+
+        if howmany / minimum(size(b)) > alg.fallback_threshold  # Use dense SVD for small blocks
             U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
-            Udata[c] = U
-            Vdata[c] = V
-        else  # Slice in case more values were converged than requested
-            Udata[c] = stack(view(lvecs, 1:howmany))
-            Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1)
-            S = @view S[1:howmany]
+            Udata[c] = @view U[:, 1:howmany]
+            Vdata[c] = @view V[1:howmany, :]
+        else
+            S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg)
+            if info.converged < howmany  # Fall back to dense SVD if not properly converged
+                U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
+                Udata[c] = @view U[:, 1:howmany]
+                Vdata[c] = @view V[1:howmany, :]
+            else  # Slice in case more values were converged than requested
+                Udata[c] = stack(view(lvecs, 1:howmany))
+                Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1)
+            end
         end
+
+        S = @view S[1:howmany]
         if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction
             Sdata[c] = S
         else

From 538652d2074255eb8d656904a8f255b80b4a1476 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Mon, 8 Jul 2024 18:40:38 +0200
Subject: [PATCH 18/27] Add SVDrrule wrapper, add separate adjoint structs and
 rrules, update SVD test, add docs

---
 src/PEPSKit.jl            |   2 +-
 src/algorithms/ctmrg.jl   |   6 +-
 src/utility/svd.jl        | 119 +++++++++++++++++++++++++++-----------
 test/ctmrg/svd_wrapper.jl |  36 ++++++------
 4 files changed, 108 insertions(+), 55 deletions(-)

diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl
index 5d341885..a95366b9 100644
--- a/src/PEPSKit.jl
+++ b/src/PEPSKit.jl
@@ -58,7 +58,7 @@ module Defaults
     const fpgrad_tol = 1e-6
 end
 
-export FullSVD, IterSVD, OldSVD
+export SVDrrule, IterSVD, OldSVD, CompleteSVDAdjoint, SparseSVDAdjoint, NonTruncSVDAdjoint
 export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv
 export NLocalOperator, AnisotropicNNOperator, OnSite, NearestNeighbor
 export expectation_value, costfun
diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index 1b424790..3cd745df 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -18,8 +18,8 @@ kind of truncation scheme can be used. If `fixedspace` is true, the truncation s
 `truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding
 environment direction/unit cell entry.
 """
-@kwdef struct ProjectorAlg{S,T}
-    svd_alg::S = TensorKit.SVD()
+@kwdef struct ProjectorAlg{S<:SVDrrule,T}
+    svd_alg::S = SVDrrule()
     trscheme::T = FixedSpaceTruncation()
     verbosity::Int = 0
 end
@@ -49,7 +49,7 @@ function CTMRG(;
     maxiter=Defaults.ctmrg_maxiter,
     miniter=Defaults.ctmrg_miniter,
     verbosity=0,
-    svd_alg=TensorKit.SVD(),
+    svd_alg=SVDrrule(),
     trscheme=FixedSpaceTruncation(),
 )
     return CTMRG(
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index ded85014..9a2651ff 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -8,6 +8,16 @@ using TensorKit:
     TruncationSpace
 CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt)
 
+"""
+    struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = CompleteSVDAdjoint())
+
+Wrapper for a SVD algorithm `svd_alg` with a defined reverse rule `rrule_alg`.
+"""
+@kwdef struct SVDrrule{S,R}
+    svd_alg::S = TensorKit.SVD()
+    rrule_alg::R = CompleteSVDAdjoint()  # TODO: should contain Lorentzian broadening eventually
+end  # Keep truncation algorithm separate to be able to specify CTMRG dependent information
+
 """
     PEPSKit.tsvd(t::AbstractTensorMap, alg; trunc=notrunc(), p=2)
 
@@ -18,22 +28,28 @@ SVD from `KrylovKit.svdsolve` is used.
 """
 PEPSKit.tsvd(t::AbstractTensorMap, alg; kwargs...) = PEPSKit.tsvd!(copy(t), alg; kwargs...)
 function PEPSKit.tsvd!(
-    t::AbstractTensorMap, alg; trunc::TruncationScheme=notrunc(), p::Real=2
+    t::AbstractTensorMap, alg::SVDrrule; trunc::TruncationScheme=notrunc(), p::Real=2
 )
-    return TensorKit.tsvd!(t; alg, trunc, p)
+    return TensorKit.tsvd!(t; alg=alg.svd_alg, trunc, p)
 end
 
-# Wrapper around Krylov Kit's GKL iterative SVD solver
+"""
+    struct IterSVD(; alg = KrylovKit.GKL(), fallback_threshold = Inf)
+
+Iterative SVD solver based on KrylovKit's GKL algorithm, adapted to (symmmetric) tensors.
+The number of targeted singular values is set via the `TruncationSpace` in `ProjectorAlg`.
+In particular, this make it possible to specify the targeted singular values block-wise.
+In case the symmetry block is too small as compared to the number of singular values, or
+the iterative SVD didn't converge, the algorithm falls back to a dense SVD.
+"""
 @kwdef struct IterSVD
     alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25)
     fallback_threshold::Float64 = Inf
-    lorentz_broad::Float64 = 0.0
-    alg_rrule::Union{GMRES,BiCGStab,Arnoldi} = GMRES(; tol=1e-14)
 end
 
 # Compute SVD data block-wise using KrylovKit algorithm
 function TensorKit._tsvd!(
-    t, alg::Union{IterSVD}, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2
+    t, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2
 )
     # early return
     if isempty(blocksectors(t))
@@ -63,14 +79,14 @@ function TensorKit._compute_svddata!(
 
         if howmany / minimum(size(b)) > alg.fallback_threshold  # Use dense SVD for small blocks
             U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
-            Udata[c] = @view U[:, 1:howmany]
-            Vdata[c] = @view V[1:howmany, :]
+            Udata[c] = U[:, 1:howmany]
+            Vdata[c] = V[1:howmany, :]
         else
             S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg)
             if info.converged < howmany  # Fall back to dense SVD if not properly converged
                 U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
-                Udata[c] = @view U[:, 1:howmany]
-                Vdata[c] = @view V[1:howmany, :]
+                Udata[c] = U[:, 1:howmany]
+                Vdata[c] = V[1:howmany, :]
             else  # Slice in case more values were converged than requested
                 Udata[c] = stack(view(lvecs, 1:howmany))
                 Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1)
@@ -88,17 +104,45 @@ function TensorKit._compute_svddata!(
     return Udata, Sdata, Vdata, dims
 end
 
-# IterSVD adjoint for tsvd! using KrylovKit.svdsolve adjoint machinery for each block
+"""
+    struct CompleteSVDAdjoint(; lorentz_broadening = 0.0)
+
+Wrapper around the complete `TensorKit.tsvd!` rrule which requires computing the full SVD.
+"""
+@kwdef struct CompleteSVDAdjoint
+    lorentz_broadening::Float64 = 0.0
+end
+
 function ChainRulesCore.rrule(
     ::typeof(PEPSKit.tsvd!),
     t::AbstractTensorMap,
-    alg::IterSVD;
+    alg::SVDrrule{A,CompleteSVDAdjoint};
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
-)
+) where {A}
+    return rrule(TensorKit.tsvd!; trunc, p, alg=alg.svd_alg)
+end
+
+"""
+    struct SparseSVDAdjoint(; lorentz_broadening = 0.0)
+
+Wrapper around the `KrylovKit.svdsolve` rrule where only the truncated decomposition is required.
+"""
+@kwdef struct SparseSVDAdjoint
+    alg::Union{GMRES,BiCGStab,Arnoldi} = GMRES()
+    lorentz_broadening::Float64 = 0.0
+end
+
+function ChainRulesCore.rrule(
+    ::typeof(PEPSKit.tsvd!),
+    t::AbstractTensorMap,
+    alg::SVDrrule{A,SparseSVDAdjoint};
+    trunc::TruncationScheme=notrunc(),
+    p::Real=2,
+) where {A}
     U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p)
 
-    function tsvd_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+    function tsvd_sparsesvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         Δt = similar(t)
         for (c, b) in blocks(Δt)
             Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
@@ -129,8 +173,8 @@ function ChainRulesCore.rrule(
                 minimal_info,
                 block(t, c),
                 :LR,
-                alg.alg,
-                alg.alg_rrule,
+                alg.svd_alg.alg,
+                alg.rrule_alg.alg,
             )
             copyto!(
                 b,
@@ -139,50 +183,57 @@ function ChainRulesCore.rrule(
         end
         return NoTangent(), Δt, NoTangent()
     end
-    function tsvd_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
+    function tsvd_sparsesvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
         return NoTangent(), ZeroTangent(), NoTangent()
     end
 
-    return (U, S, V, ϵ), tsvd_itersvd_pullback
+    return (U, S, V, ϵ), tsvd_sparsesvd_pullback
 end
 
-# Full SVD with old adjoint that doesn't account for truncation properly
-@kwdef struct OldSVD{A<:Union{TensorKit.SDD,TensorKit.SVD,IterSVD}}
-    alg::A = TensorKit.SVD()
-    lorentz_broad::Float64 = 0.0
-end
+"""
+    struct NonTruncAdjoint(; lorentz_broadening = 0.0)
 
-# Perform TensorKit.SVD in forward pass 
-function TensorKit._tsvd!(t, ::OldSVD, trunc::TruncationScheme, p::Real=2)
-    return _tsvd!(t, TensorKit.SVD(), trunc, p)
+Old SVD adjoint that does not account for the truncated part of truncated SVDs.
+"""
+@kwdef struct NonTruncSVDAdjoint
+    lorentz_broadening::Float64 = 0.0
 end
 
 # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes)
 function ChainRulesCore.rrule(
     ::typeof(PEPSKit.tsvd),
     t::AbstractTensorMap,
-    alg::OldSVD;
+    alg::SVDrrule{A,NonTruncSVDAdjoint};
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
-)
+) where {A}
     U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p)
 
-    function tsvd_oldsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+    function tsvd_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         Δt = similar(t)
         for (c, b) in blocks(Δt)
             Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
             ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c)
             copyto!(
-                b, oldsvd_rev(Uc, Sc, Vc, ΔUc, ΔSc, ΔVc; lorentz_broad=alg.lorentz_broad)
+                b,
+                oldsvd_rev(
+                    Uc,
+                    Sc,
+                    Vc,
+                    ΔUc,
+                    ΔSc,
+                    ΔVc;
+                    lorentz_broadening=alg.rrule_alg.lorentz_broadening,
+                ),
             )
         end
         return NoTangent(), Δt, NoTangent()
     end
-    function tsvd_oldsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
+    function tsvd_nontruncsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
         return NoTangent(), ZeroTangent(), NoTangent()
     end
 
-    return (U, S, V, ϵ), tsvd_oldsvd_pullback
+    return (U, S, V, ϵ), tsvd_nontruncsvd_pullback
 end
 
 function oldsvd_rev(
@@ -192,12 +243,12 @@ function oldsvd_rev(
     ΔU,
     ΔS,
     ΔV;
-    lorentz_broad=0,
+    lorentz_broadening=0,
     atol::Real=0,
     rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4),
 )
     tol = atol > 0 ? atol : rtol * S[1, 1]
-    F = _invert_S²(S, tol, lorentz_broad)  # Includes Lorentzian broadening
+    F = _invert_S²(S, tol, lorentz_broadening)  # Includes Lorentzian broadening
     S⁻¹ = pinv(S; atol=tol)
 
     # dS contribution
diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl
index bae40eb8..0f3bd1da 100644
--- a/test/ctmrg/svd_wrapper.jl
+++ b/test/ctmrg/svd_wrapper.jl
@@ -16,18 +16,22 @@ m, n = 20, 30
 dtype = ComplexF64
 χ = 12
 trunc = truncspace(ℂ^χ)
-# lorentz_broad = 1e-12
-adjoint_tol = 1e-14  # Don't make this too small, g_itersvd will be weird
+# lorentz_broadening = 1e-12
 rtol = 1e-9
 r = TensorMap(randn, dtype, ℂ^m ← ℂ^n)
 R = TensorMap(randn, space(r))
 
+full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=CompleteSVDAdjoint())
+old_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint())
+iter_alg = SVDrrule(;  # Don't make adjoint tolerance too small, g_itersvd will be weird
+    svd_alg=IterSVD(),
+    rrule_alg=SparseSVDAdjoint(; alg=GMRES(; tol=1e-14)),
+)
+
 @testset "Non-truncacted SVD" begin
-    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R), r)
-    l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R), r)
-    l_itersvd, g_itersvd = withgradient(
-        A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R), r
-    )
+    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, R), r)
+    l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, old_alg, R), r)
+    l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R), r)
 
     @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd
     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol
@@ -35,11 +39,9 @@ R = TensorMap(randn, space(r))
 end
 
 @testset "Truncated SVD with χ=$χ" begin
-    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, TensorKit.SVD(), R, trunc), r)
-    l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(), R, trunc), r)
-    l_itersvd, g_itersvd = withgradient(
-        A -> lossfun(A, IterSVD(; alg_rrule=GMRES(; tol=adjoint_tol)), R, trunc), r
-    )
+    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, R, trunc), r)
+    l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, old_alg, R, trunc), r)
+    l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R, trunc), r)
 
     @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd
     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol
@@ -47,16 +49,16 @@ end
 end
 
 # TODO: Add when Lorentzian broadening is implemented
-# @testset "Truncated SVD with χ=$χ and ε=$lorentz_broad broadening" begin
+# @testset "Truncated SVD with χ=$χ and ε=$lorentz_broadening broadening" begin
 #     l_fullsvd, g_fullsvd = withgradient(
-#         A -> lossfun(A, FullSVD(; lorentz_broad, R; trunc), r
+#         A -> lossfun(A, FullSVD(; lorentz_broadening, R; trunc), r
 #     )
-#     l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(; lorentz_broad), R; trunc), r)
+#     l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(; lorentz_broadening), R; trunc), r)
 #     l_itersvd, g_itersvd = withgradient(
-#         A -> lossfun(A, IterSVD(; howmany=χ, lorentz_broad), R; trunc), r
+#         A -> lossfun(A, IterSVD(; howmany=χ, lorentz_broadening), R; trunc), r
 #     )
 
 #     @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
 #     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol
 #     @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
-# end
+# end
\ No newline at end of file

From 7032ed0575df0d998417be7a17b7b1c1a91fb09d Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 9 Jul 2024 10:45:36 +0200
Subject: [PATCH 19/27] Add IterSVD test for symmetric tensor with fallback

---
 src/utility/svd.jl        | 18 ++++++++++--------
 test/ctmrg/svd_wrapper.jl | 32 ++++++++++++++++++++++++++++----
 2 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 9a2651ff..26b35b04 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -120,7 +120,9 @@ function ChainRulesCore.rrule(
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
 ) where {A}
-    return rrule(TensorKit.tsvd!; trunc, p, alg=alg.svd_alg)
+    fwd, tsvd!_pullback = rrule(TensorKit.tsvd!, t; trunc, p, alg=alg.svd_alg)
+    tsvd!_completesvd_pullback(Δsvd) = tsvd!_pullback(Δsvd)..., NoTangent()
+    return fwd, tsvd!_completesvd_pullback
 end
 
 """
@@ -142,7 +144,7 @@ function ChainRulesCore.rrule(
 ) where {A}
     U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p)
 
-    function tsvd_sparsesvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+    function tsvd!_sparsesvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         Δt = similar(t)
         for (c, b) in blocks(Δt)
             Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
@@ -183,11 +185,11 @@ function ChainRulesCore.rrule(
         end
         return NoTangent(), Δt, NoTangent()
     end
-    function tsvd_sparsesvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
+    function tsvd!_sparsesvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
         return NoTangent(), ZeroTangent(), NoTangent()
     end
 
-    return (U, S, V, ϵ), tsvd_sparsesvd_pullback
+    return (U, S, V, ϵ), tsvd!_sparsesvd_pullback
 end
 
 """
@@ -201,7 +203,7 @@ end
 
 # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes)
 function ChainRulesCore.rrule(
-    ::typeof(PEPSKit.tsvd),
+    ::typeof(PEPSKit.tsvd!),
     t::AbstractTensorMap,
     alg::SVDrrule{A,NonTruncSVDAdjoint};
     trunc::TruncationScheme=notrunc(),
@@ -209,7 +211,7 @@ function ChainRulesCore.rrule(
 ) where {A}
     U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p)
 
-    function tsvd_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+    function tsvd!_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         Δt = similar(t)
         for (c, b) in blocks(Δt)
             Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
@@ -229,11 +231,11 @@ function ChainRulesCore.rrule(
         end
         return NoTangent(), Δt, NoTangent()
     end
-    function tsvd_nontruncsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
+    function tsvd!_nontruncsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
         return NoTangent(), ZeroTangent(), NoTangent()
     end
 
-    return (U, S, V, ϵ), tsvd_nontruncsvd_pullback
+    return (U, S, V, ϵ), tsvd!_nontruncsvd_pullback
 end
 
 function oldsvd_rev(
diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl
index 0f3bd1da..1a65da04 100644
--- a/test/ctmrg/svd_wrapper.jl
+++ b/test/ctmrg/svd_wrapper.jl
@@ -4,6 +4,7 @@ using LinearAlgebra
 using TensorKit
 using KrylovKit
 using ChainRulesCore, Zygote
+using Accessors
 using PEPSKit
 
 # Gauge-invariant loss function
@@ -18,14 +19,14 @@ dtype = ComplexF64
 trunc = truncspace(ℂ^χ)
 # lorentz_broadening = 1e-12
 rtol = 1e-9
-r = TensorMap(randn, dtype, ℂ^m ← ℂ^n)
+r = TensorMap(randn, dtype, ℂ^m, ℂ^n)
 R = TensorMap(randn, space(r))
 
 full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=CompleteSVDAdjoint())
 old_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint())
 iter_alg = SVDrrule(;  # Don't make adjoint tolerance too small, g_itersvd will be weird
-    svd_alg=IterSVD(),
-    rrule_alg=SparseSVDAdjoint(; alg=GMRES(; tol=1e-14)),
+    svd_alg=IterSVD(; alg=GKL(; krylovdim=50)),
+    rrule_alg=SparseSVDAdjoint(; alg=GMRES(; tol=1e-13)),
 )
 
 @testset "Non-truncacted SVD" begin
@@ -61,4 +62,27 @@ end
 #     @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd 
 #     @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol
 #     @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
-# end
\ No newline at end of file
+# end
+
+symm_m, symm_n = 18, 24
+symm_space = Z2Space(0 => symm_m, 1 => symm_n)
+symm_trspace = truncspace(Z2Space(0 => symm_m ÷ 2, 1 => symm_n ÷ 3))
+symm_r = TensorMap(randn, dtype, symm_space, symm_space)
+symm_R = TensorMap(randn, dtype, space(symm_r))
+
+@testset "IterSVD of symmetric tensors" begin
+    l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, symm_R), symm_r)
+    l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, symm_R), symm_r)
+    @test l_itersvd ≈ l_fullsvd
+    @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
+
+    l_fullsvd_tr, g_fullsvd_tr = withgradient(A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r)
+    l_itersvd_tr, g_itersvd_tr = withgradient(A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r)
+    @test l_itersvd_tr ≈ l_fullsvd_tr
+    @test norm(g_fullsvd_tr[1] - g_itersvd_tr[1]) / norm(g_fullsvd_tr[1]) < rtol
+
+    iter_alg_fallback = @set iter_alg.svd_alg.fallback_threshold = 0.4  # Do dense SVD in one block, sparse SVD in the other
+    l_itersvd_fb, g_itersvd_fb = withgradient(A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r)
+    @test l_itersvd_fb ≈ l_fullsvd_tr
+    @test norm(g_fullsvd_tr[1] - g_itersvd_fb[1]) / norm(g_fullsvd_tr[1]) < rtol
+end

From d09561f9262871e9b85a6c2d589993aac9f4ca89 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 9 Jul 2024 15:43:53 +0200
Subject: [PATCH 20/27] Formatting

---
 test/ctmrg/gaugefix.jl    |  8 ++++++--
 test/ctmrg/svd_wrapper.jl | 12 +++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl
index 195df266..a6883474 100644
--- a/test/ctmrg/gaugefix.jl
+++ b/test/ctmrg/gaugefix.jl
@@ -46,7 +46,9 @@ end
     ctm = CTMRGEnv(psi; Venv=ctm_space)
 
     verbosity = 1
-    alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space)))
+    alg = CTMRG(;
+        tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space))
+    )
     alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation()
 
     ctm = leading_boundary(ctm, psi, alg)
@@ -69,7 +71,9 @@ end
     ctm = CTMRGEnv(psi; Venv=ctm_space)
 
     verbosity = 1
-    alg = CTMRG(; tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space)))
+    alg = CTMRG(;
+        tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space))
+    )
     alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation()
 
     ctm = leading_boundary(ctm, psi, alg)
diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl
index 1a65da04..b21c2af7 100644
--- a/test/ctmrg/svd_wrapper.jl
+++ b/test/ctmrg/svd_wrapper.jl
@@ -76,13 +76,19 @@ symm_R = TensorMap(randn, dtype, space(symm_r))
     @test l_itersvd ≈ l_fullsvd
     @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
 
-    l_fullsvd_tr, g_fullsvd_tr = withgradient(A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r)
-    l_itersvd_tr, g_itersvd_tr = withgradient(A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r)
+    l_fullsvd_tr, g_fullsvd_tr = withgradient(
+        A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r
+    )
+    l_itersvd_tr, g_itersvd_tr = withgradient(
+        A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r
+    )
     @test l_itersvd_tr ≈ l_fullsvd_tr
     @test norm(g_fullsvd_tr[1] - g_itersvd_tr[1]) / norm(g_fullsvd_tr[1]) < rtol
 
     iter_alg_fallback = @set iter_alg.svd_alg.fallback_threshold = 0.4  # Do dense SVD in one block, sparse SVD in the other
-    l_itersvd_fb, g_itersvd_fb = withgradient(A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r)
+    l_itersvd_fb, g_itersvd_fb = withgradient(
+        A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r
+    )
     @test l_itersvd_fb ≈ l_fullsvd_tr
     @test norm(g_fullsvd_tr[1] - g_itersvd_fb[1]) / norm(g_fullsvd_tr[1]) < rtol
 end

From b3a0726491d46702808b74ae9f12e861d34e5a85 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 9 Jul 2024 16:04:46 +0200
Subject: [PATCH 21/27] Fix missing cnext in ctmrg, update README example

---
 README.md               | 13 +++----------
 docs/src/index.md       | 13 +++----------
 src/algorithms/ctmrg.jl |  1 +
 3 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index a45818d6..e069d7a2 100644
--- a/README.md
+++ b/README.md
@@ -36,19 +36,12 @@ For example, in order to obtain the groundstate of the 2D Heisenberg model, we c
 using TensorKit, PEPSKit, KrylovKit, OptimKit
 
 # constructing the Hamiltonian:
-Jx, Jy, Jz = (-1, 1, -1) # sublattice rotation to obtain single-site unit cell
-physical_space = ComplexSpace(2)
-T = ComplexF64
-σx = TensorMap(T[0 1; 1 0], physical_space, physical_space)
-σy = TensorMap(T[0 im; -im 0], physical_space, physical_space)
-σz = TensorMap(T[1 0; 0 -1], physical_space, physical_space)
-H = (Jx * σx ⊗ σx) + (Jy * σy ⊗ σy) + (Jz * σz ⊗ σz)
-Heisenberg_hamiltonian = NLocalOperator{NearestNeighbor}(H / 4)
+H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # sublattice rotation to obtain single-site unit cell
 
 # configuring the parameters
 D = 2
 chi = 20
-ctm_alg = CTMRG(; trscheme = truncdim(chi), tol=1e-20, miniter=4, maxiter=100, verbosity=1)
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(chi))
 opt_alg = PEPSOptimize(;
     boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2),
@@ -60,7 +53,7 @@ opt_alg = PEPSOptimize(;
 # ground state search
 state = InfinitePEPS(2, D)
 ctm = leading_boundary(CTMRGEnv(state; Venv=ComplexSpace(chi)), state, ctm_alg)
-result = fixedpoint(state, Heisenberg_hamiltonian, opt_alg, ctm)
+result = fixedpoint(state, H, opt_alg, ctm)
 
 @show result.E # -0.6625...
 ```
diff --git a/docs/src/index.md b/docs/src/index.md
index 305f6d84..ea76186c 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -21,19 +21,12 @@ For example, in order to obtain the groundstate of the 2D Heisenberg model, we c
 using TensorKit, PEPSKit, KrylovKit, OptimKit
 
 # constructing the Hamiltonian:
-Jx, Jy, Jz = (-1, 1, -1) # sublattice rotation to obtain single-site unit cell
-physical_space = ComplexSpace(2)
-T = ComplexF64
-σx = TensorMap(T[0 1; 1 0], physical_space, physical_space)
-σy = TensorMap(T[0 im; -im 0], physical_space, physical_space)
-σz = TensorMap(T[1 0; 0 -1], physical_space, physical_space)
-H = (Jx * σx ⊗ σx) + (Jy * σy ⊗ σy) + (Jz * σz ⊗ σz)
-Heisenberg_hamiltonian = NLocalOperator{NearestNeighbor}(H / 4)
+H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # sublattice rotation to obtain single-site unit cell
 
 # configuring the parameters
 D = 2
 chi = 20
-ctm_alg = CTMRG(; trscheme = truncdim(chi), tol=1e-20, miniter=4, maxiter=100, verbosity=1)
+ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(chi))
 opt_alg = PEPSOptimize(;
     boundary_alg=ctm_alg,
     optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2),
@@ -45,7 +38,7 @@ opt_alg = PEPSOptimize(;
 # ground state search
 state = InfinitePEPS(2, D)
 ctm = leading_boundary(CTMRGEnv(state; Venv=ComplexSpace(chi)), state, ctm_alg)
-result = fixedpoint(state, Heisenberg_hamiltonian, opt_alg, ctm)
+result = fixedpoint(state, H, opt_alg, ctm)
 
 @show result.E # -0.6625...
 ```
diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index d0073465..69d4a465 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -346,6 +346,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T}
 
     for col in 1:size(state, 2)
         cprev = _prev(col, size(state, 2))
+        cnext = _next(col, size(state, 2))
 
         # Compute projectors
         for row in 1:size(state, 1)

From 89ae0a4e48909a69b3508b1271f8acf784e86149 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 9 Jul 2024 16:35:36 +0200
Subject: [PATCH 22/27] Rename DenseSVDAdjoint, update svd_wrapper test

---
 src/PEPSKit.jl            |  2 +-
 src/utility/svd.jl        | 16 ++++++++--------
 test/ctmrg/svd_wrapper.jl | 16 ++++++++--------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl
index 650195c5..5fbbf739 100644
--- a/src/PEPSKit.jl
+++ b/src/PEPSKit.jl
@@ -59,7 +59,7 @@ module Defaults
     const fpgrad_tol = 1e-6
 end
 
-export SVDrrule, IterSVD, OldSVD, CompleteSVDAdjoint, SparseSVDAdjoint, NonTruncSVDAdjoint
+export SVDrrule, IterSVD, OldSVD, DenseSVDAdjoint, SparseSVDAdjoint, NonTruncSVDAdjoint
 export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv
 export LocalOperator
 export expectation_value, costfun
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 26b35b04..b86df78d 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -9,13 +9,13 @@ using TensorKit:
 CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt)
 
 """
-    struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = CompleteSVDAdjoint())
+    struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = DenseSVDAdjoint())
 
 Wrapper for a SVD algorithm `svd_alg` with a defined reverse rule `rrule_alg`.
 """
 @kwdef struct SVDrrule{S,R}
     svd_alg::S = TensorKit.SVD()
-    rrule_alg::R = CompleteSVDAdjoint()  # TODO: should contain Lorentzian broadening eventually
+    rrule_alg::R = DenseSVDAdjoint()  # TODO: should contain Lorentzian broadening eventually
 end  # Keep truncation algorithm separate to be able to specify CTMRG dependent information
 
 """
@@ -105,18 +105,18 @@ function TensorKit._compute_svddata!(
 end
 
 """
-    struct CompleteSVDAdjoint(; lorentz_broadening = 0.0)
+    struct DenseSVDAdjoint(; lorentz_broadening = 0.0)
 
 Wrapper around the complete `TensorKit.tsvd!` rrule which requires computing the full SVD.
 """
-@kwdef struct CompleteSVDAdjoint
+@kwdef struct DenseSVDAdjoint
     lorentz_broadening::Float64 = 0.0
 end
 
 function ChainRulesCore.rrule(
     ::typeof(PEPSKit.tsvd!),
     t::AbstractTensorMap,
-    alg::SVDrrule{A,CompleteSVDAdjoint};
+    alg::SVDrrule{A,DenseSVDAdjoint};
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
 ) where {A}
@@ -130,15 +130,15 @@ end
 
 Wrapper around the `KrylovKit.svdsolve` rrule where only the truncated decomposition is required.
 """
-@kwdef struct SparseSVDAdjoint
-    alg::Union{GMRES,BiCGStab,Arnoldi} = GMRES()
+@kwdef struct SparseSVDAdjoint{A}
+    alg::A = GMRES()
     lorentz_broadening::Float64 = 0.0
 end
 
 function ChainRulesCore.rrule(
     ::typeof(PEPSKit.tsvd!),
     t::AbstractTensorMap,
-    alg::SVDrrule{A,SparseSVDAdjoint};
+    alg::SVDrrule{A,<:SparseSVDAdjoint};
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
 ) where {A}
diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl
index b21c2af7..7a69afea 100644
--- a/test/ctmrg/svd_wrapper.jl
+++ b/test/ctmrg/svd_wrapper.jl
@@ -22,7 +22,7 @@ rtol = 1e-9
 r = TensorMap(randn, dtype, ℂ^m, ℂ^n)
 R = TensorMap(randn, space(r))
 
-full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=CompleteSVDAdjoint())
+full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=DenseSVDAdjoint())
 old_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint())
 iter_alg = SVDrrule(;  # Don't make adjoint tolerance too small, g_itersvd will be weird
     svd_alg=IterSVD(; alg=GKL(; krylovdim=50)),
@@ -35,8 +35,8 @@ iter_alg = SVDrrule(;  # Don't make adjoint tolerance too small, g_itersvd will
     l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R), r)
 
     @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd
-    @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) < rtol
-    @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
+    @test g_fullsvd[1] ≈ g_oldsvd[1] rtol = rtol
+    @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol
 end
 
 @testset "Truncated SVD with χ=$χ" begin
@@ -45,8 +45,8 @@ end
     l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R, trunc), r)
 
     @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd
-    @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol
-    @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
+    @test !isapprox(g_fullsvd[1], g_oldsvd[1]; rtol)
+    @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol
 end
 
 # TODO: Add when Lorentzian broadening is implemented
@@ -74,7 +74,7 @@ symm_R = TensorMap(randn, dtype, space(symm_r))
     l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, symm_R), symm_r)
     l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, symm_R), symm_r)
     @test l_itersvd ≈ l_fullsvd
-    @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol
+    @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol
 
     l_fullsvd_tr, g_fullsvd_tr = withgradient(
         A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r
@@ -83,12 +83,12 @@ symm_R = TensorMap(randn, dtype, space(symm_r))
         A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r
     )
     @test l_itersvd_tr ≈ l_fullsvd_tr
-    @test norm(g_fullsvd_tr[1] - g_itersvd_tr[1]) / norm(g_fullsvd_tr[1]) < rtol
+    @test g_fullsvd_tr[1] ≈ g_itersvd_tr[1] rtol = rtol
 
     iter_alg_fallback = @set iter_alg.svd_alg.fallback_threshold = 0.4  # Do dense SVD in one block, sparse SVD in the other
     l_itersvd_fb, g_itersvd_fb = withgradient(
         A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r
     )
     @test l_itersvd_fb ≈ l_fullsvd_tr
-    @test norm(g_fullsvd_tr[1] - g_itersvd_fb[1]) / norm(g_fullsvd_tr[1]) < rtol
+    @test g_fullsvd_tr[1] ≈ g_itersvd_fb[1] rtol = rtol
 end

From 25d198cdc0c296046f11d008f03c3e950be5e572 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 9 Jul 2024 16:50:01 +0200
Subject: [PATCH 23/27] Make CRCExt extension backwards compatible with v1.8

---
 src/utility/svd.jl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index b86df78d..8305ac9d 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -6,7 +6,13 @@ using TensorKit:
     _create_svdtensors,
     NoTruncation,
     TruncationSpace
-CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt)
+
+# Enables backwards compatibility without package extensions
+CRCExt = @static if isdefined(Base, :get_extension)
+    Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt)
+else
+    KrylovKit.KrylovKitChainRulesCoreExt
+end
 
 """
     struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = DenseSVDAdjoint())

From 6b818e7008f5f7571412ef3308b02c78e25af7ac Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Tue, 9 Jul 2024 17:55:28 +0200
Subject: [PATCH 24/27] Replace SVDrrule with SVDAdjoint, clean up adjoint
 algorithms

---
 src/PEPSKit.jl            |  2 +-
 src/algorithms/ctmrg.jl   |  6 +--
 src/utility/svd.jl        | 83 +++++++++++----------------------------
 test/ctmrg/svd_wrapper.jl | 11 +++---
 4 files changed, 32 insertions(+), 70 deletions(-)

diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl
index 5fbbf739..8d8815ec 100644
--- a/src/PEPSKit.jl
+++ b/src/PEPSKit.jl
@@ -59,7 +59,7 @@ module Defaults
     const fpgrad_tol = 1e-6
 end
 
-export SVDrrule, IterSVD, OldSVD, DenseSVDAdjoint, SparseSVDAdjoint, NonTruncSVDAdjoint
+export SVDAdjoint, IterSVD, NonTruncSVDAdjoint
 export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv
 export LocalOperator
 export expectation_value, costfun
diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index 69d4a465..69bef21a 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -18,8 +18,8 @@ kind of truncation scheme can be used. If `fixedspace` is true, the truncation s
 `truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding
 environment direction/unit cell entry.
 """
-@kwdef struct ProjectorAlg{S<:SVDrrule,T}
-    svd_alg::S = SVDrrule()
+@kwdef struct ProjectorAlg{S<:SVDAdjoint,T}
+    svd_alg::S = SVDAdjoint()
     trscheme::T = FixedSpaceTruncation()
     verbosity::Int = 0
 end
@@ -49,7 +49,7 @@ function CTMRG(;
     maxiter=Defaults.ctmrg_maxiter,
     miniter=Defaults.ctmrg_miniter,
     verbosity=0,
-    svd_alg=SVDrrule(),
+    svd_alg=SVDAdjoint(),
     trscheme=FixedSpaceTruncation(),
 )
     return CTMRG(
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 8305ac9d..c76ddb9f 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -15,13 +15,18 @@ else
 end
 
 """
-    struct SVDrrule(; svd_alg = TensorKit.SVD(), rrule_alg = DenseSVDAdjoint())
+    struct SVDAdjoint(; fwd_alg = TensorKit.SVD(), rrule_alg = nothing,
+                      broadening = nothing)
 
-Wrapper for a SVD algorithm `svd_alg` with a defined reverse rule `rrule_alg`.
+Wrapper for a SVD algorithm `fwd_alg` with a defined reverse rule `rrule_alg`.
+If `isnothing(rrule_alg)`, Zygote differentiates the forward call automatically.
+In case of degenerate singular values, one might need a `broadening` scheme which
+removes the divergences from the adjoint.
 """
-@kwdef struct SVDrrule{S,R}
-    svd_alg::S = TensorKit.SVD()
-    rrule_alg::R = DenseSVDAdjoint()  # TODO: should contain Lorentzian broadening eventually
+@kwdef struct SVDAdjoint{F,R,B}
+    fwd_alg::F = TensorKit.SVD()
+    rrule_alg::R = nothing
+    broadening::B = nothing
 end  # Keep truncation algorithm separate to be able to specify CTMRG dependent information
 
 """
@@ -34,9 +39,9 @@ SVD from `KrylovKit.svdsolve` is used.
 """
 PEPSKit.tsvd(t::AbstractTensorMap, alg; kwargs...) = PEPSKit.tsvd!(copy(t), alg; kwargs...)
 function PEPSKit.tsvd!(
-    t::AbstractTensorMap, alg::SVDrrule; trunc::TruncationScheme=notrunc(), p::Real=2
+    t::AbstractTensorMap, alg::SVDAdjoint; trunc::TruncationScheme=notrunc(), p::Real=2
 )
-    return TensorKit.tsvd!(t; alg=alg.svd_alg, trunc, p)
+    return TensorKit.tsvd!(t; alg=alg.fwd_alg, trunc, p)
 end
 
 """
@@ -110,47 +115,16 @@ function TensorKit._compute_svddata!(
     return Udata, Sdata, Vdata, dims
 end
 
-"""
-    struct DenseSVDAdjoint(; lorentz_broadening = 0.0)
-
-Wrapper around the complete `TensorKit.tsvd!` rrule which requires computing the full SVD.
-"""
-@kwdef struct DenseSVDAdjoint
-    lorentz_broadening::Float64 = 0.0
-end
-
 function ChainRulesCore.rrule(
     ::typeof(PEPSKit.tsvd!),
     t::AbstractTensorMap,
-    alg::SVDrrule{A,DenseSVDAdjoint};
+    alg::SVDAdjoint{IterSVD,R,B};
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
-) where {A}
-    fwd, tsvd!_pullback = rrule(TensorKit.tsvd!, t; trunc, p, alg=alg.svd_alg)
-    tsvd!_completesvd_pullback(Δsvd) = tsvd!_pullback(Δsvd)..., NoTangent()
-    return fwd, tsvd!_completesvd_pullback
-end
-
-"""
-    struct SparseSVDAdjoint(; lorentz_broadening = 0.0)
-
-Wrapper around the `KrylovKit.svdsolve` rrule where only the truncated decomposition is required.
-"""
-@kwdef struct SparseSVDAdjoint{A}
-    alg::A = GMRES()
-    lorentz_broadening::Float64 = 0.0
-end
-
-function ChainRulesCore.rrule(
-    ::typeof(PEPSKit.tsvd!),
-    t::AbstractTensorMap,
-    alg::SVDrrule{A,<:SparseSVDAdjoint};
-    trunc::TruncationScheme=notrunc(),
-    p::Real=2,
-) where {A}
+) where {R,B}
     U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p)
 
-    function tsvd!_sparsesvd_pullback((ΔU, ΔS, ΔV, Δϵ))
+    function tsvd!_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ))
         Δt = similar(t)
         for (c, b) in blocks(Δt)
             Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
@@ -181,8 +155,8 @@ function ChainRulesCore.rrule(
                 minimal_info,
                 block(t, c),
                 :LR,
-                alg.svd_alg.alg,
-                alg.rrule_alg.alg,
+                alg.fwd_alg.alg,
+                alg.rrule_alg,
             )
             copyto!(
                 b,
@@ -191,11 +165,11 @@ function ChainRulesCore.rrule(
         end
         return NoTangent(), Δt, NoTangent()
     end
-    function tsvd!_sparsesvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
+    function tsvd!_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent})
         return NoTangent(), ZeroTangent(), NoTangent()
     end
 
-    return (U, S, V, ϵ), tsvd!_sparsesvd_pullback
+    return (U, S, V, ϵ), tsvd!_itersvd_pullback
 end
 
 """
@@ -203,18 +177,16 @@ end
 
 Old SVD adjoint that does not account for the truncated part of truncated SVDs.
 """
-@kwdef struct NonTruncSVDAdjoint
-    lorentz_broadening::Float64 = 0.0
-end
+struct NonTruncSVDAdjoint end
 
 # Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes)
 function ChainRulesCore.rrule(
     ::typeof(PEPSKit.tsvd!),
     t::AbstractTensorMap,
-    alg::SVDrrule{A,NonTruncSVDAdjoint};
+    alg::SVDAdjoint{F,NonTruncSVDAdjoint,B};
     trunc::TruncationScheme=notrunc(),
     p::Real=2,
-) where {A}
+) where {F,B}
     U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p)
 
     function tsvd!_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ))
@@ -223,16 +195,7 @@ function ChainRulesCore.rrule(
             Uc, Sc, Vc = block(U, c), block(S, c), block(V, c)
             ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c)
             copyto!(
-                b,
-                oldsvd_rev(
-                    Uc,
-                    Sc,
-                    Vc,
-                    ΔUc,
-                    ΔSc,
-                    ΔVc;
-                    lorentz_broadening=alg.rrule_alg.lorentz_broadening,
-                ),
+                b, oldsvd_rev(Uc, Sc, Vc, ΔUc, ΔSc, ΔVc; lorentz_broadening=alg.broadening)
             )
         end
         return NoTangent(), Δt, NoTangent()
diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl
index 7a69afea..3c17dc7d 100644
--- a/test/ctmrg/svd_wrapper.jl
+++ b/test/ctmrg/svd_wrapper.jl
@@ -22,12 +22,11 @@ rtol = 1e-9
 r = TensorMap(randn, dtype, ℂ^m, ℂ^n)
 R = TensorMap(randn, space(r))
 
-full_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=DenseSVDAdjoint())
-old_alg = SVDrrule(; svd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint())
-iter_alg = SVDrrule(;  # Don't make adjoint tolerance too small, g_itersvd will be weird
-    svd_alg=IterSVD(; alg=GKL(; krylovdim=50)),
-    rrule_alg=SparseSVDAdjoint(; alg=GMRES(; tol=1e-13)),
+full_alg = SVDAdjoint(; fwd_alg=TensorKit.SVD(), rrule_alg=nothing)
+old_alg = SVDAdjoint(;
+    fwd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint(), broadening=0.0
 )
+iter_alg = SVDAdjoint(; fwd_alg=IterSVD(), rrule_alg=GMRES(; tol=1e-13))  # Don't make adjoint tolerance too small, g_itersvd will be weird
 
 @testset "Non-truncacted SVD" begin
     l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, R), r)
@@ -85,7 +84,7 @@ symm_R = TensorMap(randn, dtype, space(symm_r))
     @test l_itersvd_tr ≈ l_fullsvd_tr
     @test g_fullsvd_tr[1] ≈ g_itersvd_tr[1] rtol = rtol
 
-    iter_alg_fallback = @set iter_alg.svd_alg.fallback_threshold = 0.4  # Do dense SVD in one block, sparse SVD in the other
+    iter_alg_fallback = @set iter_alg.fwd_alg.fallback_threshold = 0.4  # Do dense SVD in one block, sparse SVD in the other
     l_itersvd_fb, g_itersvd_fb = withgradient(
         A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r
     )

From bb96664196850a61346388aef30a1a79e1cdc07b Mon Sep 17 00:00:00 2001
From: lkdvos <lukas.devos@ugent.be>
Date: Tue, 9 Jul 2024 19:02:48 +0200
Subject: [PATCH 25/27] Small cleanup

---
 src/utility/svd.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index c76ddb9f..685d050d 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -85,7 +85,6 @@ function TensorKit._compute_svddata!(
     dims = SectorDict{I,Int}()
     local Sdata
     for (c, b) in blocks(t)
-        x₀ = randn(eltype(b), size(b, 1))
         howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c)
 
         if howmany / minimum(size(b)) > alg.fallback_threshold  # Use dense SVD for small blocks
@@ -93,8 +92,10 @@ function TensorKit._compute_svddata!(
             Udata[c] = U[:, 1:howmany]
             Vdata[c] = V[1:howmany, :]
         else
+            x₀ = randn(eltype(b), size(b, 1))
             S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg)
             if info.converged < howmany  # Fall back to dense SVD if not properly converged
+                @warn "Iterative SVD did not converge for block $c, falling back to dense SVD"
                 U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD())
                 Udata[c] = U[:, 1:howmany]
                 Vdata[c] = V[1:howmany, :]
@@ -104,8 +105,8 @@ function TensorKit._compute_svddata!(
             end
         end
 
-        S = @view S[1:howmany]
-        if @isdefined Sdata # cannot easily infer the type of Σ, so use this construction
+        resize!(S, howmany)
+        if @isdefined Sdata
             Sdata[c] = S
         else
             Sdata = SectorDict(c => S)

From fa7a56aa41205f6194fa50983f61d8ad9cf1ead1 Mon Sep 17 00:00:00 2001
From: lkdvos <lukas.devos@ugent.be>
Date: Wed, 10 Jul 2024 10:24:58 +0200
Subject: [PATCH 26/27] Update minimal julia version 1.9

---
 .github/workflows/CI.yml | 2 +-
 Project.toml             | 2 +-
 src/utility/svd.jl       | 7 +------
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index aad659d0..0adc6bdc 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.8' # LTS version
+          - '1.9'
           - '1' # automatically expands to the latest stable 1.x release of Julia
         os:
           - ubuntu-latest
diff --git a/Project.toml b/Project.toml
index 8777406c..c4abe2b8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -34,7 +34,7 @@ TensorKit = "0.12.5"
 TensorOperations = "4"
 VectorInterface = "0.4"
 Zygote = "0.6"
-julia = "1.8"
+julia = "1.9"
 
 [extras]
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
diff --git a/src/utility/svd.jl b/src/utility/svd.jl
index 685d050d..740892ba 100644
--- a/src/utility/svd.jl
+++ b/src/utility/svd.jl
@@ -7,12 +7,7 @@ using TensorKit:
     NoTruncation,
     TruncationSpace
 
-# Enables backwards compatibility without package extensions
-CRCExt = @static if isdefined(Base, :get_extension)
-    Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt)
-else
-    KrylovKit.KrylovKitChainRulesCoreExt
-end
+const CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt)
 
 """
     struct SVDAdjoint(; fwd_alg = TensorKit.SVD(), rrule_alg = nothing,

From 6df8efc307920df258f384a366ea4ae010b8ae23 Mon Sep 17 00:00:00 2001
From: Paul Brehmer <paul.brehmer@univie.ac.at>
Date: Wed, 10 Jul 2024 08:43:44 +0000
Subject: [PATCH 27/27] Remove duplicate line in left_move

---
 src/algorithms/ctmrg.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl
index 69bef21a..b2908b5b 100644
--- a/src/algorithms/ctmrg.jl
+++ b/src/algorithms/ctmrg.jl
@@ -373,7 +373,6 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T}
             else
                 alg.trscheme
             end
-            @tensor QQ[-1 -2 -3; -4 -5 -6] := Q_sw[-1 -2 -3; 1 2 3] * Q_nw[1 2 3; -4 -5 -6]
             @autoopt @tensor QQ[χ_EB D_EBabove D_EBbelow; χ_ET D_ETabove D_ETbelow] :=
                 Q_sw[χ_EB D_EBabove D_EBbelow; χ D1 D2] *
                 Q_nw[χ D1 D2; χ_ET D_ETabove D_ETbelow]