diff --git a/examples/heisenberg.jl b/examples/heisenberg.jl
index 5923ea71..b9f7dc4e 100644
--- a/examples/heisenberg.jl
+++ b/examples/heisenberg.jl
@@ -1,6 +1,6 @@
 using LinearAlgebra
 using TensorKit, MPSKitModels, OptimKit
-using PEPSKit
+using PEPSKit, KrylovKit
 
 # Square lattice Heisenberg Hamiltonian
 # Sublattice-rotate to get (1, 1, 1) → (-1, 1, -1), transformed to GS with single-site unit cell
@@ -24,11 +24,10 @@ H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1)
 χbond = 2
 χenv = 20
 ctmalg = CTMRG(; trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=100, verbosity=2)
-alg = PEPSOptimize{LinSolve}(;
+alg = PEPSOptimize(;
     boundary_alg=ctmalg,
     optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2),
-    fpgrad_tol=1e-6,
-    fpgrad_maxiter=100,
+    gradient_alg=GMRES(;tol=1e-6, maxiter=100),
     reuse_env=true,
     verbosity=2,
 )
diff --git a/examples/test_gradients.jl b/examples/test_gradients.jl
index 2de80ce2..50d960dc 100644
--- a/examples/test_gradients.jl
+++ b/examples/test_gradients.jl
@@ -1,6 +1,6 @@
 using LinearAlgebra
 using TensorKit, MPSKitModels, OptimKit
-using PEPSKit
+using PEPSKit, KrylovKit
 
 # Square lattice Heisenberg Hamiltonian
 function square_lattice_heisenberg(; Jx=-1.0, Jy=1.0, Jz=-1.0)
@@ -21,33 +21,23 @@ end
 # Initialize PEPS and environment
 H = square_lattice_heisenberg()
 χbond = 2
-χenv = 20
-ctmalg = CTMRG(; trscheme=truncdim(χenv), tol=1e-12, miniter=4, maxiter=100, verbosity=2)
+χenv = 16
+boundary_alg = CTMRG(; trscheme=truncdim(χenv), tol=1e-12, miniter=4, maxiter=100, verbosity=2)
 ψ = InfinitePEPS(2, χbond)
-env = leading_boundary(ψ, ctmalg, CTMRGEnv(ψ; Venv=ℂ^χenv))
+env = leading_boundary(ψ, boundary_alg, CTMRGEnv(ψ; Venv=ℂ^χenv))
 
 # Compute CTM gradient in four different ways (set reuse_env=false to not mutate environment)
-println("\nFP gradient using naive AD:")
-alg_naive = PEPSOptimize{NaiveAD}(; boundary_alg=ctmalg, verbosity=2, reuse_env=false)
-@time _, g_naive = PEPSKit.ctmrg_gradient((ψ, env), H, alg_naive)
-
-println("\nFP gradient using explicit evaluation of the geometric sum:")
-alg_geomsum = PEPSOptimize{GeomSum}(;
-    boundary_alg=ctmalg, fpgrad_tol=1e-6, fpgrad_maxiter=100, verbosity=2, reuse_env=false
-)
-@time _, g_geomsum = PEPSKit.ctmrg_gradient((ψ, env), H, alg_geomsum)
-
-println("\nFP gradient using manual iteration of the linear problem:")
-alg_maniter = PEPSOptimize{ManualIter}(;
-    boundary_alg=ctmalg, fpgrad_tol=1e-6, fpgrad_maxiter=100, verbosity=2, reuse_env=false
-)
-@time _, g_maniter = PEPSKit.ctmrg_gradient((ψ, env), H, alg_maniter)
+function compute_grad(gradient_alg)
+    @info "FP gradient using $(gradient_alg):"
+    alg = PEPSOptimize(; boundary_alg, gradient_alg, reuse_env=false)
+    @time _, g = PEPSKit.ctmrg_gradient((ψ, env), H, alg)
+    return g
+end
 
-println("\nFP gradient using GMRES to solve the linear problem:")
-alg_linsolve = PEPSOptimize{LinSolve}(;
-    boundary_alg=ctmalg, fpgrad_tol=1e-6, fpgrad_maxiter=100, verbosity=2, reuse_env=false
-)
-@time _, g_linsolve = PEPSKit.ctmrg_gradient((ψ, env), H, alg_linsolve)
+g_naive = compute_grad(NaiveAD())
+g_geomsum = compute_grad(GeomSum())
+g_maniter = compute_grad(ManualIter())
+g_linsolve = compute_grad(KrylovKit.GMRES())
 
 @show norm(g_geomsum - g_naive) / norm(g_naive)
 @show norm(g_maniter - g_naive) / norm(g_naive)
diff --git a/src/algorithms/peps_opt.jl b/src/algorithms/peps_opt.jl
index 775f1242..9c0cf2e0 100644
--- a/src/algorithms/peps_opt.jl
+++ b/src/algorithms/peps_opt.jl
@@ -1,19 +1,42 @@
 abstract type GradMode end
 
+"""
+    NaiveAD <: GradMode
+
+Gradient mode for CTMRG using AD.
+"""
 struct NaiveAD <: GradMode end
-struct GeomSum <: GradMode end
-struct ManualIter <: GradMode end
-struct LinSolve <: GradMode end
+
+"""
+    GeomSum <: GradMode
+
+Gradient mode for CTMRG using explicit evaluation of the geometric sum.
+"""
+@kwdef struct GeomSum <: GradMode
+    maxiter::Int = Defaults.fpgrad_maxiter
+    tol::Real = Defaults.fpgrad_tol
+    verbosity::Int = 0
+end
+
+"""
+    ManualIter <: GradMode
+
+Gradient mode for CTMRG using manual iteration to solve the linear problem.
+"""
+@kwdef struct ManualIter <: GradMode
+    maxiter::Int = Defaults.fpgrad_maxiter
+    tol::Real = Defaults.fpgrad_tol
+    verbosity::Int = 0
+end
 
 # Algorithm struct containing parameters for PEPS optimization
-@kwdef struct PEPSOptimize{G<:GradMode}
+@kwdef struct PEPSOptimize{G}
     boundary_alg::CTMRG = CTMRG()  # Algorithm to find boundary environment
     optimizer::OptimKit.OptimizationAlgorithm = LBFGS(
         4; maxiter=100, gradtol=1e-4, verbosity=2
     )
     reuse_env::Bool = true  # Reuse environment of previous optimization as initial guess for next
-    fpgrad_tol::Float64 = Defaults.fpgrad_tol  # Convergence tolerance for gradient FP iteration
-    fpgrad_maxiter::Int = Defaults.fpgrad_maxiter  # Maximal number  of FP iterations
+    gradient_alg::G = GeomSum()  # Algorithm to solve gradient linear problem
     verbosity::Int = 0
 end
 
@@ -43,55 +66,70 @@ end
 # Take real valued part of dot product
 my_inner(_, η₁, η₂) = real(dot(η₁, η₂))
 
-# Function returning energy and CTMRG gradient for each optimization step
-function ctmrg_gradient(x, H, alg::PEPSOptimize)
-    peps, env = x
-    E, g = withgradient(ψ -> _ctmrgcostfun!(ψ, env, H, alg), peps)
-    ∂E∂A = g[1]  # Extract PEPS derivative from gradient tuple
-    if !(typeof(∂E∂A) <: InfinitePEPS)  # NaiveAD returns NamedTuple as gradient instead of InfinitePEPS
+#=
+Evaluating the gradient of the cost function for CTMRG:
+- The gradient of the cost function for CTMRG can be computed using automatic differentiation (AD) or explicit evaluation of the geometric sum.
+- With AD, the gradient is computed by differentiating the cost function with respect to the PEPS tensors, including computing the environment tensors.
+- With explicit evaluation of the geometric sum, the gradient is computed by differentiating the cost function with the environment kept fixed, and then manually adding the gradient contributions from the environments.
+=#
+
+function ctmrg_gradient((peps, envs), H, alg::PEPSOptimize{NaiveAD})
+    E, g = withgradient(peps) do ψ
+        envs = leading_boundary(ψ, alg.boundary_alg, envs)
+        alg.reuse_env && (envs = env′)
+        return costfun(ψ, envs, H)
+    end
+
+    # AD returns namedtuple as gradient instead of InfinitePEPS
+    ∂E∂A = g[1]
+    if !(∂E∂A isa InfinitePEPS)
+        # TODO: check if `reconstruct` works
         ∂E∂A = InfinitePEPS(∂E∂A.A)
     end
     @assert !isnan(norm(∂E∂A))
     return E, ∂E∂A
 end
 
-# Helper function wrapping CTMRG run and cost function with custom adjoint
-function _ctmrgcostfun!(peps, env::CTMRGEnv, H, alg::PEPSOptimize)
-    env′ = leading_boundary(peps, alg.boundary_alg, env)
-    alg.reuse_env && @diffset env = env′
-    return costfun(peps, env′, H)
-end
-
-# Energy gradient backwards rule (does not apply to NaiveAD gradient mode)
-function ChainRulesCore.rrule(
-    ::typeof(_ctmrgcostfun!), peps, env::CTMRGEnv, H, alg::PEPSOptimize{G}
-) where {G<:Union{GeomSum,ManualIter,LinSolve}}
-    env = leading_boundary(peps, alg.boundary_alg, env)
+function ctmrg_gradient((peps, envs), H, alg::PEPSOptimize{T}) where {T<:Union{GeomSum, ManualIter, KrylovKit.LinearSolver}}
+    # find partial gradients of costfun
+    env = leading_boundary(peps, alg.boundary_alg, envs)
     E, Egrad = withgradient(costfun, peps, env, H)
     ∂F∂A = InfinitePEPS(Egrad[1]...)
     ∂F∂x = CTMRGEnv(Egrad[2]...)
-    _, envvjp = pullback(
-        (A, x) -> gauge_fix(x, ctmrg_iter(A, x, alg.boundary_alg)[1]), peps, env
-    )
+
+    # find partial gradients of single ctmrg iteration
+    _, envvjp = pullback(peps, envs) do A, x
+        return gauge_fix(x, ctmrg_iter(A, x, alg.boundary_alg)[1])
+    end
     ∂f∂A(x) = InfinitePEPS(envvjp(x)[1]...)
     ∂f∂x(x) = CTMRGEnv(envvjp(x)[2]...)
 
-    function costfun!_pullback(_)
-        # TODO: Add interface to choose y₀ and possibly kwargs that are passed to fpgrad?
-        # y₀ = CTMRGEnv(peps; Venv=space(env.edges[1])[1])  # This leads to slow convergence in LinSolve and gauge warnings
-        dx, = fpgrad(∂F∂x, ∂f∂x, ∂f∂A, ∂F∂x, alg)
-        return NoTangent(), ∂F∂A + dx, NoTangent(), NoTangent(), NoTangent(), NoTangent()
-    end
+    # evaluate the geometric sum
+    ∂F∂envs = fpgrad(∂F∂x, ∂f∂x, ∂f∂A, ∂F∂x, alg.gradient_alg)
 
-    return E, costfun!_pullback
+    return E, ∂F∂A + ∂F∂envs
 end
 
-# Compute energy and energy gradient, by explicitly evaluating geometric series
-function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, _, alg::PEPSOptimize{GeomSum})
+@doc """
+    fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y0, alg)
+
+Compute the gradient of the cost function for CTMRG by solving the following equation:
+
+dx = ∑ₙ (∂f∂x)ⁿ ∂f∂A dA = (1 - ∂f∂x)⁻¹ ∂f∂A dA
+
+where `∂F∂x` is the gradient of the cost function with respect to the PEPS tensors, `∂f∂x`
+is the partial gradient of the CTMRG iteration with respect to the environment tensors,
+`∂f∂A` is the partial gradient of the CTMRG iteration with respect to the PEPS tensors, and
+`y0` is the initial guess for the fixed-point iteration. The function returns the gradient
+`dx` of the fixed-point iteration.
+"""
+fpgrad
+
+function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, _, alg::GeomSum)
     g = ∂F∂x
-    dx = ∂f∂A(g)  # n=0 term: ∂F∂x ∂f∂A
-    ϵ = 1.0
-    for i in 1:(alg.fpgrad_maxiter)
+    dx = ∂f∂A(g) # n = 0 term: ∂F∂x ∂f∂A
+    ϵ = 2 * alg.tol
+    for i in 1:(alg.maxiter)
         g = ∂f∂x(g)
         Σₙ = ∂f∂A(g)
         dx += Σₙ
@@ -101,19 +139,19 @@ function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, _, alg::PEPSOptimize{GeomSum})
             @printf("Gradient iter: %3d   ‖Σₙ‖: %.2e   Δ‖Σₙ‖: %.2e\n", i, ϵnew, Δϵ)
         ϵ = ϵnew
 
-        ϵ < alg.fpgrad_tol && break
-        if alg.verbosity > 0 && i == alg.fpgrad_maxiter
+        ϵ < alg.tol && break
+        if alg.verbosity > 0 && i == alg.maxiter
             @warn "gradient fixed-point iteration reached maximal number of iterations at ‖Σₙ‖ = $ϵ"
         end
     end
-    return dx, ϵ
+    return dx
 end
 
-# Manual iteration to solve gradient linear problem
-function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::PEPSOptimize{ManualIter})
+
+function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::ManualIter)
     y = deepcopy(y₀)  # Do not mutate y₀
     ϵ = 1.0
-    for i in 1:(alg.fpgrad_maxiter)
+    for i in 1:(alg.maxiter)
         y′ = ∂F∂x + ∂f∂x(y)
 
         norma = norm(y.corners[NORTHWEST])
@@ -125,24 +163,19 @@ function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::PEPSOptimize{ManualIter
         y = y′
         ϵ = ϵnew
 
-        ϵ < alg.fpgrad_tol && break
-        if alg.verbosity > 0 && i == alg.fpgrad_maxiter
+        ϵ < alg.tol && break
+        if alg.verbosity > 0 && i == alg.maxiter
             @warn "gradient fixed-point iteration reached maximal number of iterations at ‖Cᵢ₊₁-Cᵢ‖ = $ϵ"
         end
     end
-    return ∂f∂A(y), ϵ
+    return ∂f∂A(y)
 end
 
-# Use KrylovKit.linsolve to solve gradient linear problem
-function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::PEPSOptimize{LinSolve})
-    grad_op(env) = env - ∂f∂x(env)
-    y, info = linsolve(
-        grad_op, ∂F∂x, y₀; rtol=alg.fpgrad_tol, maxiter=alg.fpgrad_maxiter, krylovdim=20
-    )
-
+function fpgrad(∂F∂x, ∂f∂x, ∂f∂A, y₀, alg::KrylovKit.LinearSolver)
+    y, info = linsolve(∂f∂x, ∂F∂x, y₀, alg, 1, -1)
     if alg.verbosity > 0 && info.converged != 1
         @warn("gradient fixed-point iteration reached maximal number of iterations:", info)
     end
 
-    return ∂f∂A(y), info
-end
\ No newline at end of file
+    return ∂f∂A(y)
+end