Skip to content

Commit

Permalink
Refactor part of the PEPS optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
lkdvos committed Mar 12, 2024
1 parent 6f14da5 commit 6816fc1
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 85 deletions.
7 changes: 3 additions & 4 deletions examples/heisenberg.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using LinearAlgebra
using TensorKit, MPSKitModels, OptimKit
using PEPSKit
using PEPSKit, KrylovKit

# Square lattice Heisenberg Hamiltonian
# Sublattice-rotate to get (1, 1, 1) → (-1, 1, -1), transformed to GS with single-site unit cell
Expand All @@ -24,11 +24,10 @@ H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1)
χbond = 2
χenv = 20
ctmalg = CTMRG(; trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=100, verbosity=2)
alg = PEPSOptimize{LinSolve}(;
alg = PEPSOptimize(;
boundary_alg=ctmalg,
optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2),
fpgrad_tol=1e-6,
fpgrad_maxiter=100,
gradient_alg=GMRES(;tol=1e-6, maxiter=100),
reuse_env=true,
verbosity=2,
)
Expand Down
38 changes: 14 additions & 24 deletions examples/test_gradients.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using LinearAlgebra
using TensorKit, MPSKitModels, OptimKit
using PEPSKit
using PEPSKit, KrylovKit

# Square lattice Heisenberg Hamiltonian
function square_lattice_heisenberg(; Jx=-1.0, Jy=1.0, Jz=-1.0)
Expand All @@ -21,33 +21,23 @@ end
# Initialize PEPS and environment
H = square_lattice_heisenberg()
χbond = 2
χenv = 20
ctmalg = CTMRG(; trscheme=truncdim(χenv), tol=1e-12, miniter=4, maxiter=100, verbosity=2)
χenv = 16
boundary_alg = CTMRG(; trscheme=truncdim(χenv), tol=1e-12, miniter=4, maxiter=100, verbosity=2)
ψ = InfinitePEPS(2, χbond)
env = leading_boundary(ψ, ctmalg, CTMRGEnv(ψ; Venv=^χenv))
env = leading_boundary(ψ, boundary_alg, CTMRGEnv(ψ; Venv=^χenv))

# Compute CTM gradient in four different ways (set reuse_env=false to not mutate environment)
println("\nFP gradient using naive AD:")
alg_naive = PEPSOptimize{NaiveAD}(; boundary_alg=ctmalg, verbosity=2, reuse_env=false)
@time _, g_naive = PEPSKit.ctmrg_gradient((ψ, env), H, alg_naive)

println("\nFP gradient using explicit evaluation of the geometric sum:")
alg_geomsum = PEPSOptimize{GeomSum}(;
boundary_alg=ctmalg, fpgrad_tol=1e-6, fpgrad_maxiter=100, verbosity=2, reuse_env=false
)
@time _, g_geomsum = PEPSKit.ctmrg_gradient((ψ, env), H, alg_geomsum)

println("\nFP gradient using manual iteration of the linear problem:")
alg_maniter = PEPSOptimize{ManualIter}(;
boundary_alg=ctmalg, fpgrad_tol=1e-6, fpgrad_maxiter=100, verbosity=2, reuse_env=false
)
@time _, g_maniter = PEPSKit.ctmrg_gradient((ψ, env), H, alg_maniter)
function compute_grad(gradient_alg)
@info "FP gradient using $(gradient_alg):"
alg = PEPSOptimize(; boundary_alg, gradient_alg, reuse_env=false)
@time _, g = PEPSKit.ctmrg_gradient((ψ, env), H, alg)
return g
end

println("\nFP gradient using GMRES to solve the linear problem:")
alg_linsolve = PEPSOptimize{LinSolve}(;
boundary_alg=ctmalg, fpgrad_tol=1e-6, fpgrad_maxiter=100, verbosity=2, reuse_env=false
)
@time _, g_linsolve = PEPSKit.ctmrg_gradient((ψ, env), H, alg_linsolve)
g_naive = compute_grad(NaiveAD())
g_geomsum = compute_grad(GeomSum())
g_maniter = compute_grad(ManualIter())
g_linsolve = compute_grad(KrylovKit.GMRES())

@show norm(g_geomsum - g_naive) / norm(g_naive)
@show norm(g_maniter - g_naive) / norm(g_naive)
Expand Down
147 changes: 90 additions & 57 deletions src/algorithms/peps_opt.jl
Original file line number Diff line number Diff line change
@@ -1,19 +1,42 @@
abstract type GradMode end

"""
NaiveAD <: GradMode
Gradient mode for CTMRG using AD.
"""
struct NaiveAD <: GradMode end
struct GeomSum <: GradMode end
struct ManualIter <: GradMode end
struct LinSolve <: GradMode end

"""
GeomSum <: GradMode
Gradient mode for CTMRG using explicit evaluation of the geometric sum.
"""
@kwdef struct GeomSum <: GradMode
maxiter::Int = Defaults.fpgrad_maxiter
tol::Real = Defaults.fpgrad_tol
verbosity::Int = 0
end

"""
ManualIter <: GradMode
Gradient mode for CTMRG using manual iteration to solve the linear problem.
"""
@kwdef struct ManualIter <: GradMode
maxiter::Int = Defaults.fpgrad_maxiter
tol::Real = Defaults.fpgrad_tol
verbosity::Int = 0
end

# Algorithm struct containing parameters for PEPS optimization
@kwdef struct PEPSOptimize{G<:GradMode}
@kwdef struct PEPSOptimize{G}
boundary_alg::CTMRG = CTMRG() # Algorithm to find boundary environment
optimizer::OptimKit.OptimizationAlgorithm = LBFGS(
4; maxiter=100, gradtol=1e-4, verbosity=2
)
reuse_env::Bool = true # Reuse environment of previous optimization as initial guess for next
fpgrad_tol::Float64 = Defaults.fpgrad_tol # Convergence tolerance for gradient FP iteration
fpgrad_maxiter::Int = Defaults.fpgrad_maxiter # Maximal number of FP iterations
gradient_alg::G = GeomSum() # Algorithm to solve gradient linear problem
verbosity::Int = 0
end

Expand Down Expand Up @@ -43,55 +66,70 @@ end
# Take real valued part of dot product
my_inner(_, η₁, η₂) = real(dot(η₁, η₂))

# Function returning energy and CTMRG gradient for each optimization step
function ctmrg_gradient(x, H, alg::PEPSOptimize)
peps, env = x
E, g = withgradient-> _ctmrgcostfun!(ψ, env, H, alg), peps)
∂E∂A = g[1] # Extract PEPS derivative from gradient tuple
if !(typeof(∂E∂A) <: InfinitePEPS) # NaiveAD returns NamedTuple as gradient instead of InfinitePEPS
#=
Evaluating the gradient of the cost function for CTMRG:
- The gradient of the cost function for CTMRG can be computed using automatic differentiation (AD) or explicit evaluation of the geometric sum.
- With AD, the gradient is computed by differentiating the cost function with respect to the PEPS tensors, including computing the environment tensors.
- With explicit evaluation of the geometric sum, the gradient is computed by differentiating the cost function with the environment kept fixed, and then manually adding the gradient contributions from the environments.
=#

function ctmrg_gradient((peps, envs), H, alg::PEPSOptimize{NaiveAD})
E, g = withgradient(peps) do ψ
envs = leading_boundary(ψ, alg.boundary_alg, envs)
alg.reuse_env && (envs = env′)
return costfun(ψ, envs, H)
end

# AD returns namedtuple as gradient instead of InfinitePEPS
∂E∂A = g[1]
if !(∂E∂A isa InfinitePEPS)
# TODO: check if `reconstruct` works
∂E∂A = InfinitePEPS(∂E∂A.A)
end
@assert !isnan(norm(∂E∂A))
return E, ∂E∂A
end

# Helper function wrapping CTMRG run and cost function with custom adjoint
function _ctmrgcostfun!(peps, env::CTMRGEnv, H, alg::PEPSOptimize)
env′ = leading_boundary(peps, alg.boundary_alg, env)
alg.reuse_env && @diffset env = env′
return costfun(peps, env′, H)
end

# Energy gradient backwards rule (does not apply to NaiveAD gradient mode)
function ChainRulesCore.rrule(
::typeof(_ctmrgcostfun!), peps, env::CTMRGEnv, H, alg::PEPSOptimize{G}
) where {G<:Union{GeomSum,ManualIter,LinSolve}}
env = leading_boundary(peps, alg.boundary_alg, env)
function ctmrg_gradient((peps, envs), H, alg::PEPSOptimize{T}) where {T<:Union{GeomSum, ManualIter, KrylovKit.LinearSolver}}
# find partial gradients of costfun
env = leading_boundary(peps, alg.boundary_alg, envs)
E, Egrad = withgradient(costfun, peps, env, H)
∂F∂A = InfinitePEPS(Egrad[1]...)
∂F∂x = CTMRGEnv(Egrad[2]...)
_, envvjp = pullback(
(A, x) -> gauge_fix(x, ctmrg_iter(A, x, alg.boundary_alg)[1]), peps, env
)

# find partial gradients of single ctmrg iteration
_, envvjp = pullback(peps, envs) do A, x
return gauge_fix(x, ctmrg_iter(A, x, alg.boundary_alg)[1])
end
∂f∂A(x) = InfinitePEPS(envvjp(x)[1]...)
∂f∂x(x) = CTMRGEnv(envvjp(x)[2]...)

function costfun!_pullback(_)
# TODO: Add interface to choose y₀ and possibly kwargs that are passed to fpgrad?
# y₀ = CTMRGEnv(peps; Venv=space(env.edges[1])[1]) # This leads to slow convergence in LinSolve and gauge warnings
dx, = fpgrad(∂F∂x, ∂f∂x, ∂f∂A, ∂F∂x, alg)
return NoTangent(), ∂F∂A + dx, NoTangent(), NoTangent(), NoTangent(), NoTangent()
end
# evaluate the geometric sum
∂F∂envs = fpgrad(∂F∂x, ∂f∂x, ∂f∂A, ∂F∂x, alg.gradient_alg)

return E, costfun!_pullback
return E, ∂F∂A + ∂F∂envs
end

# Compute energy and energy gradient, by explicitly evaluating geometric series
function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, _, alg::PEPSOptimize{GeomSum})
@doc """
fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y0, alg)
Compute the gradient of the cost function for CTMRG by solving the following equation:
dx = ∑ₙ (∂f∂x)ⁿ ∂f∂A dA = (1 - ∂f∂x)⁻¹ ∂f∂A dA
where `∂F∂x` is the gradient of the cost function with respect to the PEPS tensors, `∂f∂x`
is the partial gradient of the CTMRG iteration with respect to the environment tensors,
`∂f∂A` is the partial gradient of the CTMRG iteration with respect to the PEPS tensors, and
`y0` is the initial guess for the fixed-point iteration. The function returns the gradient
`dx` of the fixed-point iteration.
"""
fpgrad

function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, _, alg::GeomSum)
g = ∂F∂x
dx = ∂f∂A(g) # n=0 term: ∂F∂x ∂f∂A
ϵ = 1.0
for i in 1:(alg.fpgrad_maxiter)
dx = ∂f∂A(g) # n = 0 term: ∂F∂x ∂f∂A
ϵ = 2 * alg.tol
for i in 1:(alg.maxiter)
g = ∂f∂x(g)
Σₙ = ∂f∂A(g)
dx += Σₙ
Expand All @@ -101,19 +139,19 @@ function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, _, alg::PEPSOptimize{GeomSum})
@printf("Gradient iter: %3d ‖Σₙ‖: %.2e Δ‖Σₙ‖: %.2e\n", i, ϵnew, Δϵ)
ϵ = ϵnew

ϵ < alg.fpgrad_tol && break
if alg.verbosity > 0 && i == alg.fpgrad_maxiter
ϵ < alg.tol && break
if alg.verbosity > 0 && i == alg.maxiter
@warn "gradient fixed-point iteration reached maximal number of iterations at ‖Σₙ‖ = "
end
end
return dx, ϵ
return dx
end

# Manual iteration to solve gradient linear problem
function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::PEPSOptimize{ManualIter})

function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::ManualIter)
y = deepcopy(y₀) # Do not mutate y₀
ϵ = 1.0
for i in 1:(alg.fpgrad_maxiter)
for i in 1:(alg.maxiter)
y′ = ∂F∂x + ∂f∂x(y)

norma = norm(y.corners[NORTHWEST])
Expand All @@ -125,24 +163,19 @@ function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::PEPSOptimize{ManualIter
y = y′
ϵ = ϵnew

ϵ < alg.fpgrad_tol && break
if alg.verbosity > 0 && i == alg.fpgrad_maxiter
ϵ < alg.tol && break
if alg.verbosity > 0 && i == alg.maxiter
@warn "gradient fixed-point iteration reached maximal number of iterations at ‖Cᵢ₊₁-Cᵢ‖ = "
end
end
return ∂f∂A(y), ϵ
return ∂f∂A(y)
end

# Use KrylovKit.linsolve to solve gradient linear problem
function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::PEPSOptimize{LinSolve})
grad_op(env) = env - ∂f∂x(env)
y, info = linsolve(
grad_op, ∂F∂x, y₀; rtol=alg.fpgrad_tol, maxiter=alg.fpgrad_maxiter, krylovdim=20
)

function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::KrylovKit.LinearSolver)
y, info = linsolve(∂f∂x, ∂F∂x, y₀, alg, 1, -1)
if alg.verbosity > 0 && info.converged != 1
@warn("gradient fixed-point iteration reached maximal number of iterations:", info)
end

return ∂f∂A(y), info
end
return ∂f∂A(y)
end

0 comments on commit 6816fc1

Please sign in to comment.