diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index aad659d0..0adc6bdc 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: version: - - '1.8' # LTS version + - '1.9' - '1' # automatically expands to the latest stable 1.x release of Julia os: - ubuntu-latest diff --git a/Project.toml b/Project.toml index 85623d71..c4abe2b8 100644 --- a/Project.toml +++ b/Project.toml @@ -23,18 +23,18 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" Accessors = "0.1" ChainRulesCore = "1.0" Compat = "3.46, 4.2" -KrylovKit = "0.6, 0.7, 0.8" +KrylovKit = "0.8" LinearAlgebra = "1" MPSKit = "0.11" OptimKit = "0.3" Printf = "1" Random = "1" Statistics = "1" -TensorKit = "0.12.4" +TensorKit = "0.12.5" TensorOperations = "4" VectorInterface = "0.4" Zygote = "0.6" -julia = "1.8" +julia = "1.9" [extras] ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" diff --git a/README.md b/README.md index a45818d6..e069d7a2 100644 --- a/README.md +++ b/README.md @@ -36,19 +36,12 @@ For example, in order to obtain the groundstate of the 2D Heisenberg model, we c using TensorKit, PEPSKit, KrylovKit, OptimKit # constructing the Hamiltonian: -Jx, Jy, Jz = (-1, 1, -1) # sublattice rotation to obtain single-site unit cell -physical_space = ComplexSpace(2) -T = ComplexF64 -σx = TensorMap(T[0 1; 1 0], physical_space, physical_space) -σy = TensorMap(T[0 im; -im 0], physical_space, physical_space) -σz = TensorMap(T[1 0; 0 -1], physical_space, physical_space) -H = (Jx * σx ⊗ σx) + (Jy * σy ⊗ σy) + (Jz * σz ⊗ σz) -Heisenberg_hamiltonian = NLocalOperator{NearestNeighbor}(H / 4) +H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # sublattice rotation to obtain single-site unit cell # configuring the parameters D = 2 chi = 20 -ctm_alg = CTMRG(; trscheme = truncdim(chi), tol=1e-20, miniter=4, maxiter=100, verbosity=1) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(chi)) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2), @@ -60,7 +53,7 @@ opt_alg = PEPSOptimize(; # ground state search state = InfinitePEPS(2, D) ctm = leading_boundary(CTMRGEnv(state; Venv=ComplexSpace(chi)), state, ctm_alg) -result = fixedpoint(state, Heisenberg_hamiltonian, opt_alg, ctm) +result = fixedpoint(state, H, opt_alg, ctm) @show result.E # -0.6625... ``` diff --git a/docs/src/index.md b/docs/src/index.md index 305f6d84..ea76186c 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -21,19 +21,12 @@ For example, in order to obtain the groundstate of the 2D Heisenberg model, we c using TensorKit, PEPSKit, KrylovKit, OptimKit # constructing the Hamiltonian: -Jx, Jy, Jz = (-1, 1, -1) # sublattice rotation to obtain single-site unit cell -physical_space = ComplexSpace(2) -T = ComplexF64 -σx = TensorMap(T[0 1; 1 0], physical_space, physical_space) -σy = TensorMap(T[0 im; -im 0], physical_space, physical_space) -σz = TensorMap(T[1 0; 0 -1], physical_space, physical_space) -H = (Jx * σx ⊗ σx) + (Jy * σy ⊗ σy) + (Jz * σz ⊗ σz) -Heisenberg_hamiltonian = NLocalOperator{NearestNeighbor}(H / 4) +H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # sublattice rotation to obtain single-site unit cell # configuring the parameters D = 2 chi = 20 -ctm_alg = CTMRG(; trscheme = truncdim(chi), tol=1e-20, miniter=4, maxiter=100, verbosity=1) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(chi)) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2), @@ -45,7 +38,7 @@ opt_alg = PEPSOptimize(; # ground state search state = InfinitePEPS(2, D) ctm = leading_boundary(CTMRGEnv(state; Venv=ComplexSpace(chi)), state, ctm_alg) -result = fixedpoint(state, Heisenberg_hamiltonian, opt_alg, ctm) +result = fixedpoint(state, H, opt_alg, ctm) @show result.E # -0.6625... ``` diff --git a/examples/boundary_mps.jl b/examples/boundary_mps.jl index abd52e17..8893f63e 100644 --- a/examples/boundary_mps.jl +++ b/examples/boundary_mps.jl @@ -32,9 +32,7 @@ mps, envs, ϵ = leading_boundary(mps, T, VUMPS()) N = abs(prod(expectation_value(mps, T))) # This can be compared to the result obtained using the CTMRG algorithm -ctm = leading_boundary( - peps, CTMRG(; verbosity=1, fixedspace=true), CTMRGEnv(peps; Venv=ComplexSpace(20)) -) +ctm = leading_boundary(peps, CTMRG(; verbosity=1), CTMRGEnv(peps; Venv=ComplexSpace(20))) N´ = abs(norm(peps, ctm)) @show abs(N - N´) / N @@ -55,9 +53,7 @@ mps2 = PEPSKit.initializeMPS(T2, fill(ComplexSpace(20), 2, 2)) mps2, envs2, ϵ = leading_boundary(mps2, T2, VUMPS()) N2 = abs(prod(expectation_value(mps2, T2))) -ctm2 = leading_boundary( - peps2, CTMRG(; verbosity=1, fixedspace=true), CTMRGEnv(peps2; Venv=ComplexSpace(20)) -) +ctm2 = leading_boundary(peps2, CTMRG(; verbosity=1), CTMRGEnv(peps2; Venv=ComplexSpace(20))) N2´ = abs(norm(peps2, ctm2)) @show abs(N2 - N2´) / N2 diff --git a/examples/heisenberg.jl b/examples/heisenberg.jl index 0cfd8c06..a43f313c 100644 --- a/examples/heisenberg.jl +++ b/examples/heisenberg.jl @@ -11,9 +11,9 @@ H = square_lattice_heisenberg(; Jx=-1, Jy=1, Jz=-1) # Parameters χbond = 2 χenv = 20 -ctmalg = CTMRG(; trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=100, verbosity=1) -alg = PEPSOptimize(; - boundary_alg=ctmalg, +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(χenv)) +opt_alg = PEPSOptimize(; + boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-4, verbosity=2), gradient_alg=GMRES(; tol=1e-6, maxiter=100), reuse_env=true, @@ -27,6 +27,6 @@ alg = PEPSOptimize(; # E/N = −0.6694421, which is a QMC estimate from https://arxiv.org/abs/1101.3281. # Of course there is a noticable bias for small χbond and χenv. ψ₀ = InfinitePEPS(2, χbond) -env₀ = leading_boundary(CTMRGEnv(ψ₀; Venv=ℂ^χenv), ψ₀, ctmalg) -result = fixedpoint(ψ₀, H, alg, env₀) +env₀ = leading_boundary(CTMRGEnv(ψ₀; Venv=ℂ^χenv), ψ₀, ctm_alg) +result = fixedpoint(ψ₀, H, opt_alg, env₀) @show result.E diff --git a/src/PEPSKit.jl b/src/PEPSKit.jl index 46879a20..8d8815ec 100644 --- a/src/PEPSKit.jl +++ b/src/PEPSKit.jl @@ -9,7 +9,7 @@ using TensorKit, KrylovKit, MPSKit, OptimKit, TensorOperations using ChainRulesCore, Zygote include("utility/util.jl") -include("utility/eigsolve.jl") +include("utility/svd.jl") include("utility/rotations.jl") include("utility/hook_pullback.jl") include("utility/autoopt.jl") @@ -59,12 +59,14 @@ module Defaults const fpgrad_tol = 1e-6 end -export CTMRG, CTMRGEnv +export SVDAdjoint, IterSVD, NonTruncSVDAdjoint +export FixedSpaceTruncation, ProjectorAlg, CTMRG, CTMRGEnv export LocalOperator export expectation_value, costfun export leading_boundary export PEPSOptimize, GeomSum, ManualIter, LinSolve export fixedpoint + export InfinitePEPS, InfiniteTransferPEPS export InfinitePEPO, InfiniteTransferPEPO export initializeMPS, initializePEPS diff --git a/src/algorithms/ctmrg.jl b/src/algorithms/ctmrg.jl index b86ccfc8..b2908b5b 100644 --- a/src/algorithms/ctmrg.jl +++ b/src/algorithms/ctmrg.jl @@ -1,24 +1,60 @@ +""" + FixedSpaceTruncation <: TensorKit.TruncationScheme + +CTMRG specific truncation scheme for `tsvd` which keeps the bond space on which the SVD +is performed fixed. Since different environment directions and unit cell entries might +have different spaces, this truncation style is different from `TruncationSpace`. +""" +struct FixedSpaceTruncation <: TensorKit.TruncationScheme end + +# TODO: add option for different projector styles (half-infinite, full-infinite, etc.) +""" + struct ProjectorAlg{S}(; svd_alg = TensorKit.SVD(), trscheme = TensorKit.notrunc(), + fixedspace = false, verbosity = 0) + +Algorithm struct collecting all projector related parameters. The truncation scheme has to be +a `TensorKit.TruncationScheme`, and some SVD algorithms might have further restrictions on what +kind of truncation scheme can be used. If `fixedspace` is true, the truncation scheme is set to +`truncspace(V)` where `V` is the environment bond space, adjusted to the corresponding +environment direction/unit cell entry. +""" +@kwdef struct ProjectorAlg{S<:SVDAdjoint,T} + svd_alg::S = SVDAdjoint() + trscheme::T = FixedSpaceTruncation() + verbosity::Int = 0 +end + # TODO: add abstract Algorithm type? """ - struct CTMRG(; trscheme = TensorKit.notrunc(), tol = Defaults.ctmrg_tol, - maxiter = Defaults.ctmrg_maxiter, miniter = Defaults.ctmrg_miniter, - verbosity = 0, fixedspace = false) + CTMRG(; tol=Defaults.ctmrg_tol, maxiter=Defaults.ctmrg_maxiter, + miniter=Defaults.ctmrg_miniter, verbosity=0, + svd_alg=TensorKit.SVD(), trscheme=FixedSpaceTruncation()) Algorithm struct that represents the CTMRG algorithm for contracting infinite PEPS. -The projector bond dimensions are set via `trscheme` which controls the truncation -properties inside of `TensorKit.tsvd`. Each CTMRG run is converged up to `tol` -where the singular value convergence of the corners as well as the norm is checked. -The maximal and minimal number of CTMRG iterations is set with `maxiter` and `miniter`. -Different levels of output information are printed depending on `verbosity` (0, 1 or 2). -Regardless of the truncation scheme, the space can be kept fixed with `fixedspace`. +Each CTMRG run is converged up to `tol` where the singular value convergence of the +corners as well as the norm is checked. The maximal and minimal number of CTMRG iterations +is set with `maxiter` and `miniter`. Different levels of output information are printed +depending on `verbosity` (0, 1 or 2). The projectors are computed from `svd_alg` SVDs +where the truncation scheme is set via `trscheme`. """ -@kwdef struct CTMRG - trscheme::TruncationScheme = TensorKit.notrunc() - tol::Float64 = Defaults.ctmrg_tol - maxiter::Int = Defaults.ctmrg_maxiter - miniter::Int = Defaults.ctmrg_miniter - verbosity::Int = 0 - fixedspace::Bool = false +struct CTMRG + tol::Float64 + maxiter::Int + miniter::Int + verbosity::Int + projector_alg::ProjectorAlg +end +function CTMRG(; + tol=Defaults.ctmrg_tol, + maxiter=Defaults.ctmrg_maxiter, + miniter=Defaults.ctmrg_miniter, + verbosity=0, + svd_alg=SVDAdjoint(), + trscheme=FixedSpaceTruncation(), +) + return CTMRG( + tol, maxiter, miniter, verbosity, ProjectorAlg(; svd_alg, trscheme, verbosity) + ) end """ @@ -39,6 +75,7 @@ function MPSKit.leading_boundary(envinit, state, alg::CTMRG) for i in 1:(alg.maxiter) env, ϵ = ctmrg_iter(state, env, alg) # Grow and renormalize in all 4 directions + conv_condition, normold, CSold, TSold, ϵ = ignore_derivatives() do # Compute convergence criteria and take max (TODO: How should we handle logging all of this?) Δϵ = abs((ϵold - ϵ) / ϵold) @@ -86,9 +123,7 @@ function MPSKit.leading_boundary(envinit, state, alg::CTMRG) end # Do one final iteration that does not change the spaces - alg_fixed = CTMRG(; - alg.trscheme, alg.tol, alg.maxiter, alg.miniter, alg.verbosity, fixedspace=true - ) + alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation() env′, = ctmrg_iter(state, env, alg_fixed) envfix = gauge_fix(env, env′) check_elementwise_convergence(env, envfix; atol=alg.tol^(1 / 2)) || @@ -288,7 +323,7 @@ function ctmrg_iter(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} ϵ = 0.0 for _ in 1:4 - env, _, _, ϵ₀ = left_move(state, env, alg) + env, _, _, ϵ₀ = left_move(state, env, alg.projector_alg) state = rotate_north(state, EAST) env = rotate_north(env, EAST) ϵ = max(ϵ, ϵ₀) @@ -303,7 +338,7 @@ end Grow, project and renormalize the environment `env` in west direction. Return the updated environment as well as the projectors and truncation error. """ -function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} +function left_move(state, env::CTMRGEnv{C,T}, alg::ProjectorAlg) where {C,T} corners::typeof(env.corners) = copy(env.corners) edges::typeof(env.edges) = copy(env.edges) ϵ = 0.0 @@ -311,6 +346,7 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} for col in 1:size(state, 2) cprev = _prev(col, size(state, 2)) + cnext = _next(col, size(state, 2)) # Compute projectors for row in 1:size(state, 1) @@ -332,15 +368,15 @@ function left_move(state, env::CTMRGEnv{C,T}, alg::CTMRG) where {C,T} ) # SVD half-infinite environment - trscheme = if alg.fixedspace == true - truncspace(space(env.edges[WEST, row, col], 1)) + trscheme = if alg.trscheme isa FixedSpaceTruncation + truncspace(space(env.edges[WEST, row, cnext], 1)) else alg.trscheme end @autoopt @tensor QQ[χ_EB D_EBabove D_EBbelow; χ_ET D_ETabove D_ETbelow] := Q_sw[χ_EB D_EBabove D_EBbelow; χ D1 D2] * Q_nw[χ D1 D2; χ_ET D_ETabove D_ETbelow] - U, S, V, ϵ_local = tsvd!(QQ; trunc=trscheme, alg=TensorKit.SVD()) + U, S, V, ϵ_local = PEPSKit.tsvd!(QQ, alg.svd_alg; trunc=trscheme) ϵ = max(ϵ, ϵ_local / norm(S)) # TODO: check if we can just normalize enlarged corners s.t. trunc behaves a bit better diff --git a/src/utility/eigsolve.jl b/src/utility/eigsolve.jl deleted file mode 100644 index 975adf49..00000000 --- a/src/utility/eigsolve.jl +++ /dev/null @@ -1,251 +0,0 @@ -# Copied from Jutho/KrylovKit.jl/pull/56, with minor tweaks - -function ChainRulesCore.rrule( - ::typeof(eigsolve), A::AbstractMatrix, x₀, howmany, which, alg -) - (vals, vecs, info) = eigsolve(A, x₀, howmany, which, alg) - project_A = ProjectTo(A) - T = eltype(vecs[1]) # will be real for real symmetric problems and complex otherwise - - function eigsolve_pullback(ΔX) - _Δvals = unthunk(ΔX[1]) - _Δvecs = unthunk(ΔX[2]) - - ∂self = NoTangent() - ∂x₀ = ZeroTangent() - ∂howmany = NoTangent() - ∂which = NoTangent() - ∂alg = NoTangent() - if _Δvals isa AbstractZero && _Δvecs isa AbstractZero - ∂A = ZeroTangent() - return ∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg - end - - if _Δvals isa AbstractZero - Δvals = fill(NoTangent(), length(Δvecs)) - else - Δvals = _Δvals - end - if _Δvecs isa AbstractZero - Δvecs = fill(NoTangent(), length(Δvals)) - else - Δvecs = _Δvecs - end - - @assert length(Δvals) == length(Δvecs) - @assert length(Δvals) <= length(vals) - - # Determine algorithm to solve linear problem - # TODO: Is there a better choice? Should we make this user configurable? - linalg = GMRES(; - tol=alg.tol, krylovdim=alg.krylovdim, maxiter=alg.maxiter, orth=alg.orth - ) - - ws = similar(vecs, length(Δvecs)) - for i in 1:length(Δvecs) - Δλ = Δvals[i] - Δv = Δvecs[i] - λ = vals[i] - v = vecs[i] - - # First threat special cases - if isa(Δv, AbstractZero) && isa(Δλ, AbstractZero) # no contribution - ws[i] = Δv # some kind of zero - continue - end - if isa(Δv, AbstractZero) && isa(alg, Lanczos) # simple contribution - ws[i] = Δλ * v - continue - end - - # General case : - if isa(Δv, AbstractZero) - b = RecursiveVec(zero(T) * v, T[Δλ]) - else - @assert isa(Δv, typeof(v)) - b = RecursiveVec(Δv, T[Δλ]) - end - - if i > 1 && - eltype(A) <: Real && - vals[i] == conj(vals[i - 1]) && - Δvals[i] == conj(Δvals[i - 1]) && - vecs[i] == conj(vecs[i - 1]) && - Δvecs[i] == conj(Δvecs[i - 1]) - ws[i] = conj(ws[i - 1]) - continue - end - - w, reverse_info = let λ = λ, v = v, Aᴴ = A' - linsolve(b, zero(T) * b, linalg) do x - x1, x2 = x - γ = 1 - # γ can be chosen freely and does not affect the solution theoretically - # The current choice guarantees that the extended matrix is Hermitian if A is - # TODO: is this the best choice in all cases? - y1 = axpy!(-γ * x2[], v, axpy!(-conj(λ), x1, A' * x1)) - y2 = T[-dot(v, x1)] - return RecursiveVec(y1, y2) - end - end - if info.converged >= i && reverse_info.converged == 0 - @warn "The cotangent linear problem did not converge, whereas the primal eigenvalue problem did." - end - ws[i] = w[1] - end - - if A isa StridedMatrix - ∂A = InplaceableThunk( - Ā -> _buildĀ!(Ā, ws, vecs), @thunk(_buildĀ!(zero(A), ws, vecs)) - ) - else - ∂A = @thunk(project_A(_buildĀ!(zero(A), ws, vecs))) - end - return ∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg - end - return (vals, vecs, info), eigsolve_pullback -end - -function _buildĀ!(Ā, ws, vs) - for i in 1:length(ws) - w = ws[i] - v = vs[i] - if !(w isa AbstractZero) - if eltype(Ā) <: Real && eltype(w) <: Complex - mul!(Ā, _realview(w), _realview(v)', -1, 1) - mul!(Ā, _imagview(w), _imagview(v)', -1, 1) - else - mul!(Ā, w, v', -1, 1) - end - end - end - return Ā -end -function _realview(v::AbstractVector{Complex{T}}) where {T} - return view(reinterpret(T, v), 2 * (1:length(v)) .- 1) -end -function _imagview(v::AbstractVector{Complex{T}}) where {T} - return view(reinterpret(T, v), 2 * (1:length(v))) -end - -function ChainRulesCore.rrule( - config::RuleConfig{>:HasReverseMode}, - ::typeof(eigsolve), - A::AbstractMatrix, - x₀, - howmany, - which, - alg, -) - return ChainRulesCore.rrule(eigsolve, A, x₀, howmany, which, alg) -end - -function ChainRulesCore.rrule( - config::RuleConfig{>:HasReverseMode}, ::typeof(eigsolve), f, x₀, howmany, which, alg -) - (vals, vecs, info) = eigsolve(f, x₀, howmany, which, alg) - resize!(vecs, howmany) - resize!(vals, howmany) - T = typeof(dot(vecs[1], vecs[1])) - f_pullbacks = map(x -> rrule_via_ad(config, f, x)[2], vecs) - function eigsolve_pullback(ΔX) - _Δvals = unthunk(ΔX[1]) - _Δvecs = unthunk(ΔX[2]) - - ∂self = NoTangent() - ∂x₀ = ZeroTangent() - ∂howmany = NoTangent() - ∂which = NoTangent() - ∂alg = NoTangent() - if _Δvals isa AbstractZero && _Δvecs isa AbstractZero - ∂A = ZeroTangent() - return (∂self, ∂A, ∂x₀, ∂howmany, ∂which, ∂alg) - end - - if _Δvals isa AbstractZero - Δvals = fill(NoTangent(), howmany) - else - Δvals = _Δvals - end - if _Δvecs isa AbstractZero - Δvecs = fill(NoTangent(), howmany) - else - Δvecs = _Δvecs - end - - # filter ZeroTangents, added compared to Jutho/KrylovKit.jl/pull/56 - Δvecs = filter(x -> !(x isa AbstractZero), Δvecs) - @assert length(Δvals) == length(Δvecs) - - # Determine algorithm to solve linear problem - # TODO: Is there a better choice? Should we make this user configurable? - linalg = GMRES(; - tol=alg.tol, - krylovdim=alg.krylovdim + 10, - maxiter=alg.maxiter * 10, - orth=alg.orth, - ) - # linalg = BiCGStab(; - # tol = alg.tol, - # maxiter = alg.maxiter*alg.krylovdim, - # ) - - ws = similar(Δvecs) - for i in 1:length(Δvecs) - Δλ = Δvals[i] - Δv = Δvecs[i] - λ = vals[i] - v = vecs[i] - - # First threat special cases - if isa(Δv, AbstractZero) && isa(Δλ, AbstractZero) # no contribution - ws[i] = 0 * v # some kind of zero - continue - end - if isa(Δv, AbstractZero) && isa(alg, Lanczos) # simple contribution - ws[i] = Δλ * v - continue - end - - # General case : - if isa(Δv, AbstractZero) - b = RecursiveVec(zero(T) * v, T[-Δλ]) - else - @assert isa(Δv, typeof(v)) - b = RecursiveVec(-Δv, T[-Δλ]) - end - - # TODO: is there any analogy to this for general vector-like user types - # if i > 1 && eltype(A) <: Real && - # vals[i] == conj(vals[i-1]) && Δvals[i] == conj(Δvals[i-1]) && - # vecs[i] == conj(vecs[i-1]) && Δvecs[i] == conj(Δvecs[i-1]) - # - # ws[i] = conj(ws[i-1]) - # continue - # end - - w, reverse_info = let λ = λ, v = v, fᴴ = x -> f_pullbacks[i](x)[2] - linsolve(b, zero(T) * b, linalg) do x - x1, x2 = x - γ = 1 - # γ can be chosen freely and does not affect the solution theoretically - # The current choice guarantees that the extended matrix is Hermitian if A is - # TODO: is this the best choice in all cases? - y1 = axpy!(-γ * x2[], v, axpy!(-conj(λ), x1, fᴴ(x1))) - y2 = T[-dot(v, x1)] - return RecursiveVec(y1, y2) - end - end - if info.converged >= i && reverse_info.converged == 0 - @warn "The cotangent linear problem ($i) did not converge, whereas the primal eigenvalue problem did." reverse_info b - end - ws[i] = w[1] - end - ∂f = f_pullbacks[1](ws[1])[1] - for i in 2:length(ws) - ∂f = VectorInterface.add!!(∂f, f_pullbacks[i](ws[i])[1]) - end - return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg - end - return (vals, vecs, info), eigsolve_pullback -end diff --git a/src/utility/svd.jl b/src/utility/svd.jl new file mode 100644 index 00000000..740892ba --- /dev/null +++ b/src/utility/svd.jl @@ -0,0 +1,268 @@ +using TensorKit: + SectorDict, + _tsvd!, + _empty_svdtensors, + _compute_svddata!, + _create_svdtensors, + NoTruncation, + TruncationSpace + +const CRCExt = Base.get_extension(KrylovKit, :KrylovKitChainRulesCoreExt) + +""" + struct SVDAdjoint(; fwd_alg = TensorKit.SVD(), rrule_alg = nothing, + broadening = nothing) + +Wrapper for a SVD algorithm `fwd_alg` with a defined reverse rule `rrule_alg`. +If `isnothing(rrule_alg)`, Zygote differentiates the forward call automatically. +In case of degenerate singular values, one might need a `broadening` scheme which +removes the divergences from the adjoint. +""" +@kwdef struct SVDAdjoint{F,R,B} + fwd_alg::F = TensorKit.SVD() + rrule_alg::R = nothing + broadening::B = nothing +end # Keep truncation algorithm separate to be able to specify CTMRG dependent information + +""" + PEPSKit.tsvd(t::AbstractTensorMap, alg; trunc=notrunc(), p=2) + +Wrapper around `TensorKit.tsvd` which dispatches on the `alg` argument. +This is needed since a custom adjoint for `PEPSKit.tsvd` may be defined, +depending on the algorithm. E.g., for `IterSVD` the adjoint for a truncated +SVD from `KrylovKit.svdsolve` is used. +""" +PEPSKit.tsvd(t::AbstractTensorMap, alg; kwargs...) = PEPSKit.tsvd!(copy(t), alg; kwargs...) +function PEPSKit.tsvd!( + t::AbstractTensorMap, alg::SVDAdjoint; trunc::TruncationScheme=notrunc(), p::Real=2 +) + return TensorKit.tsvd!(t; alg=alg.fwd_alg, trunc, p) +end + +""" + struct IterSVD(; alg = KrylovKit.GKL(), fallback_threshold = Inf) + +Iterative SVD solver based on KrylovKit's GKL algorithm, adapted to (symmmetric) tensors. +The number of targeted singular values is set via the `TruncationSpace` in `ProjectorAlg`. +In particular, this make it possible to specify the targeted singular values block-wise. +In case the symmetry block is too small as compared to the number of singular values, or +the iterative SVD didn't converge, the algorithm falls back to a dense SVD. +""" +@kwdef struct IterSVD + alg::KrylovKit.GKL = KrylovKit.GKL(; tol=1e-14, krylovdim=25) + fallback_threshold::Float64 = Inf +end + +# Compute SVD data block-wise using KrylovKit algorithm +function TensorKit._tsvd!( + t, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace}, p::Real=2 +) + # early return + if isempty(blocksectors(t)) + truncerr = zero(real(scalartype(t))) + return _empty_svdtensors(t)..., truncerr + end + + Udata, Σdata, Vdata, dims = _compute_svddata!(t, alg, trunc) + U, S, V = _create_svdtensors(t, Udata, Σdata, Vdata, spacetype(t)(dims)) + truncerr = trunc isa NoTruncation ? abs(zero(scalartype(t))) : norm(U * S * V - t, p) + + return U, S, V, truncerr +end +function TensorKit._compute_svddata!( + t::TensorMap, alg::IterSVD, trunc::Union{NoTruncation,TruncationSpace} +) + InnerProductStyle(t) === EuclideanProduct() || throw_invalid_innerproduct(:tsvd!) + I = sectortype(t) + A = storagetype(t) + Udata = SectorDict{I,A}() + Vdata = SectorDict{I,A}() + dims = SectorDict{I,Int}() + local Sdata + for (c, b) in blocks(t) + howmany = trunc isa NoTruncation ? minimum(size(b)) : blockdim(trunc.space, c) + + if howmany / minimum(size(b)) > alg.fallback_threshold # Use dense SVD for small blocks + U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) + Udata[c] = U[:, 1:howmany] + Vdata[c] = V[1:howmany, :] + else + x₀ = randn(eltype(b), size(b, 1)) + S, lvecs, rvecs, info = KrylovKit.svdsolve(b, x₀, howmany, :LR, alg.alg) + if info.converged < howmany # Fall back to dense SVD if not properly converged + @warn "Iterative SVD did not converge for block $c, falling back to dense SVD" + U, S, V = TensorKit.MatrixAlgebra.svd!(b, TensorKit.SVD()) + Udata[c] = U[:, 1:howmany] + Vdata[c] = V[1:howmany, :] + else # Slice in case more values were converged than requested + Udata[c] = stack(view(lvecs, 1:howmany)) + Vdata[c] = stack(conj, view(rvecs, 1:howmany); dims=1) + end + end + + resize!(S, howmany) + if @isdefined Sdata + Sdata[c] = S + else + Sdata = SectorDict(c => S) + end + dims[c] = length(S) + end + return Udata, Sdata, Vdata, dims +end + +function ChainRulesCore.rrule( + ::typeof(PEPSKit.tsvd!), + t::AbstractTensorMap, + alg::SVDAdjoint{IterSVD,R,B}; + trunc::TruncationScheme=notrunc(), + p::Real=2, +) where {R,B} + U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) + + function tsvd!_itersvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + Δt = similar(t) + for (c, b) in blocks(Δt) + Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) + ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c) + Sdc = view(Sc, diagind(Sc)) + ΔSdc = ΔSc isa AbstractZero ? ΔSc : view(ΔSc, diagind(ΔSc)) + + n_vals = length(Sdc) + lvecs = Vector{Vector{scalartype(t)}}(eachcol(Uc)) + rvecs = Vector{Vector{scalartype(t)}}(eachcol(Vc')) + minimal_info = KrylovKit.ConvergenceInfo(length(Sdc), nothing, nothing, -1, -1) # Just supply converged to SVD pullback + + if ΔUc isa AbstractZero && ΔVc isa AbstractZero # Handle ZeroTangent singular vectors + Δlvecs = fill(ZeroTangent(), n_vals) + Δrvecs = fill(ZeroTangent(), n_vals) + else + Δlvecs = Vector{Vector{scalartype(t)}}(eachcol(ΔUc)) + Δrvecs = Vector{Vector{scalartype(t)}}(eachcol(ΔVc')) + end + + xs, ys = CRCExt.compute_svdsolve_pullback_data( + ΔSc isa AbstractZero ? fill(zero(Sc[1]), n_vals) : ΔSdc, + Δlvecs, + Δrvecs, + Sdc, + lvecs, + rvecs, + minimal_info, + block(t, c), + :LR, + alg.fwd_alg.alg, + alg.rrule_alg, + ) + copyto!( + b, + CRCExt.construct∂f_svd(HasReverseMode(), block(t, c), lvecs, rvecs, xs, ys), + ) + end + return NoTangent(), Δt, NoTangent() + end + function tsvd!_itersvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + return NoTangent(), ZeroTangent(), NoTangent() + end + + return (U, S, V, ϵ), tsvd!_itersvd_pullback +end + +""" + struct NonTruncAdjoint(; lorentz_broadening = 0.0) + +Old SVD adjoint that does not account for the truncated part of truncated SVDs. +""" +struct NonTruncSVDAdjoint end + +# Use outdated adjoint in reverse pass (not taking truncated part into account for testing purposes) +function ChainRulesCore.rrule( + ::typeof(PEPSKit.tsvd!), + t::AbstractTensorMap, + alg::SVDAdjoint{F,NonTruncSVDAdjoint,B}; + trunc::TruncationScheme=notrunc(), + p::Real=2, +) where {F,B} + U, S, V, ϵ = PEPSKit.tsvd(t, alg; trunc, p) + + function tsvd!_nontruncsvd_pullback((ΔU, ΔS, ΔV, Δϵ)) + Δt = similar(t) + for (c, b) in blocks(Δt) + Uc, Sc, Vc = block(U, c), block(S, c), block(V, c) + ΔUc, ΔSc, ΔVc = block(ΔU, c), block(ΔS, c), block(ΔV, c) + copyto!( + b, oldsvd_rev(Uc, Sc, Vc, ΔUc, ΔSc, ΔVc; lorentz_broadening=alg.broadening) + ) + end + return NoTangent(), Δt, NoTangent() + end + function tsvd!_nontruncsvd_pullback(::Tuple{ZeroTangent,ZeroTangent,ZeroTangent}) + return NoTangent(), ZeroTangent(), NoTangent() + end + + return (U, S, V, ϵ), tsvd!_nontruncsvd_pullback +end + +function oldsvd_rev( + U::AbstractMatrix, + S::AbstractMatrix, + V::AbstractMatrix, + ΔU, + ΔS, + ΔV; + lorentz_broadening=0, + atol::Real=0, + rtol::Real=atol > 0 ? 0 : eps(scalartype(S))^(3 / 4), +) + tol = atol > 0 ? atol : rtol * S[1, 1] + F = _invert_S²(S, tol, lorentz_broadening) # Includes Lorentzian broadening + S⁻¹ = pinv(S; atol=tol) + + # dS contribution + term = ΔS isa ZeroTangent ? ΔS : Diagonal(diag(ΔS)) + + # dU₁ and dV₁ off-diagonal contribution + J = F .* (U' * ΔU) + term += (J + J') * S + VΔV = (V * ΔV') + K = F .* VΔV + term += S * (K + K') + + # dV₁ diagonal contribution (diagonal of dU₁ is gauged away) + if scalartype(U) <: Complex && !(ΔV isa ZeroTangent) && !(ΔU isa ZeroTangent) + L = Diagonal(diag(VΔV)) + term += 0.5 * S⁻¹ * (L' - L) + end + ΔA = U * term * V + + # Projector contribution for non-square A + UUd = U * U' + VdV = V' * V + Uproj = one(UUd) - UUd + Vproj = one(VdV) - VdV + ΔA += Uproj * ΔU * S⁻¹ * V + U * S⁻¹ * ΔV * Vproj # Wrong truncation contribution + + return ΔA +end + +# Computation of F in SVD adjoint, including Lorentzian broadening +function _invert_S²(S::AbstractMatrix{T}, tol::Real, ε=0) where {T<:Real} + F = similar(S) + @inbounds for i in axes(F, 1), j in axes(F, 2) + F[i, j] = if i == j + zero(T) + else + sᵢ, sⱼ = S[i, i], S[j, j] + Δs = abs(sⱼ - sᵢ) < tol ? tol : sⱼ^2 - sᵢ^2 + ε > 0 && (Δs = _lorentz_broaden(Δs, ε)) + 1 / Δs + end + end + return F +end + +# Lorentzian broadening for SVD adjoint F-singularities +function _lorentz_broaden(x::Real, ε=1e-12) + x′ = 1 / x + return x′ / (x′^2 + ε) +end diff --git a/test/boundarymps/vumps.jl b/test/boundarymps/vumps.jl index bbf69006..feacbe0d 100644 --- a/test/boundarymps/vumps.jl +++ b/test/boundarymps/vumps.jl @@ -17,9 +17,7 @@ const vumps_alg = VUMPS(; alg_eigsolve=MPSKit.Defaults.alg_eigsolve(; ishermitia mps, envs, ϵ = leading_boundary(mps, T, vumps_alg) N = abs(sum(expectation_value(mps, T))) - ctm = leading_boundary( - CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1, fixedspace=true) - ) + ctm = leading_boundary(CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1)) N´ = abs(norm(psi, ctm)) @test N ≈ N´ atol = 1e-3 @@ -33,12 +31,10 @@ end mps, envs, ϵ = leading_boundary(mps, T, vumps_alg) N = abs(prod(expectation_value(mps, T))) - ctm = leading_boundary( - CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1, fixedspace=true) - ) + ctm = leading_boundary(CTMRGEnv(psi; Venv=ComplexSpace(20)), psi, CTMRG(; verbosity=1)) N´ = abs(norm(psi, ctm)) - @test N ≈ N´ rtol = 1e-3 + @test N ≈ N´ rtol = 1e-2 end @testset "PEPO runthrough" begin diff --git a/test/ctmrg/gaugefix.jl b/test/ctmrg/gaugefix.jl index 45bee5d5..a6883474 100644 --- a/test/ctmrg/gaugefix.jl +++ b/test/ctmrg/gaugefix.jl @@ -2,6 +2,7 @@ using Test using Random using PEPSKit using TensorKit +using Accessors using PEPSKit: ctmrg_iter, gauge_fix, check_elementwise_convergence @@ -46,9 +47,9 @@ end verbosity = 1 alg = CTMRG(; - trscheme=truncdim(dim(ctm_space)), tol=1e-10, miniter=4, maxiter=400, verbosity + tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space)) ) - alg_fixed = CTMRG(; trscheme=truncdim(dim(ctm_space)), verbosity, fixedspace=true) + alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation() ctm = leading_boundary(ctm, psi, alg) ctm2, = ctmrg_iter(psi, ctm, alg_fixed) @@ -71,9 +72,9 @@ end verbosity = 1 alg = CTMRG(; - trscheme=truncspace(ctm_space), tol=1e-10, miniter=4, maxiter=400, verbosity + tol=1e-10, miniter=4, maxiter=400, verbosity, trscheme=truncdim(dim(ctm_space)) ) - alg_fixed = CTMRG(; trscheme=truncspace(ctm_space), verbosity, fixedspace=true) + alg_fixed = @set alg.projector_alg.trscheme = FixedSpaceTruncation() ctm = leading_boundary(ctm, psi, alg) ctm2, = ctmrg_iter(psi, ctm, alg_fixed) diff --git a/test/ctmrg/gradients.jl b/test/ctmrg/gradients.jl index 1c81063a..978d65e7 100644 --- a/test/ctmrg/gradients.jl +++ b/test/ctmrg/gradients.jl @@ -17,9 +17,7 @@ models = [square_lattice_heisenberg(), square_lattice_pwave()] names = ["Heisenberg", "p-wave superconductor"] Random.seed!(42039482030) tol = 1e-8 -boundary_alg = CTMRG(; - trscheme=truncdim(χenv), tol=tol, miniter=4, maxiter=100, fixedspace=true, verbosity=0 -) +boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0) gradmodes = [ nothing, GeomSum(; tol), ManualIter(; tol), KrylovKit.GMRES(; tol=tol, maxiter=10) ] diff --git a/test/ctmrg/gradparts.jl b/test/ctmrg/gradparts.jl index c21a2420..9c2e3442 100644 --- a/test/ctmrg/gradparts.jl +++ b/test/ctmrg/gradparts.jl @@ -30,9 +30,7 @@ Vspaces = [ComplexSpace(χbond), Vect[FermionParity](0 => χbond / 2, 1 => χbon Espaces = [ComplexSpace(χenv), Vect[FermionParity](0 => χenv / 2, 1 => χenv / 2)] functions = [left_move, ctmrg_iter, leading_boundary] tol = 1e-8 -boundary_alg = CTMRG(; - trscheme=truncdim(χenv), tol=tol, miniter=4, maxiter=100, fixedspace=true, verbosity=0 -) +boundary_alg = CTMRG(; tol=tol, miniter=4, maxiter=100, verbosity=0) ## Gauge invariant function of the environment # -------------------------------------------- @@ -51,10 +49,8 @@ end ## Tests # ------ -@testset "Reverse rules for composite parts of the CTMRG fixed point with spacetype $(Vspaces[i])" for i in - eachindex( - Pspaces -) +title = "Reverse rules for composite parts of the CTMRG fixed point with spacetype" +@testset title * "$(Vspaces[i])" for i in eachindex(Pspaces) psi = InfinitePEPS(Pspaces[i], Vspaces[i], Vspaces[i]) env = CTMRGEnv(psi; Venv=Espaces[i]) diff --git a/test/ctmrg/svd_wrapper.jl b/test/ctmrg/svd_wrapper.jl new file mode 100644 index 00000000..3c17dc7d --- /dev/null +++ b/test/ctmrg/svd_wrapper.jl @@ -0,0 +1,93 @@ +using Test +using Random +using LinearAlgebra +using TensorKit +using KrylovKit +using ChainRulesCore, Zygote +using Accessors +using PEPSKit + +# Gauge-invariant loss function +function lossfun(A, alg, R=TensorMap(randn, space(A)), trunc=notrunc()) + U, _, V, = PEPSKit.tsvd(A, alg; trunc) + return real(dot(R, U * V)) # Overlap with random tensor R is gauge-invariant and differentiable, also for m≠n +end + +m, n = 20, 30 +dtype = ComplexF64 +χ = 12 +trunc = truncspace(ℂ^χ) +# lorentz_broadening = 1e-12 +rtol = 1e-9 +r = TensorMap(randn, dtype, ℂ^m, ℂ^n) +R = TensorMap(randn, space(r)) + +full_alg = SVDAdjoint(; fwd_alg=TensorKit.SVD(), rrule_alg=nothing) +old_alg = SVDAdjoint(; + fwd_alg=TensorKit.SVD(), rrule_alg=NonTruncSVDAdjoint(), broadening=0.0 +) +iter_alg = SVDAdjoint(; fwd_alg=IterSVD(), rrule_alg=GMRES(; tol=1e-13)) # Don't make adjoint tolerance too small, g_itersvd will be weird + +@testset "Non-truncacted SVD" begin + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, R), r) + l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, old_alg, R), r) + l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R), r) + + @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd + @test g_fullsvd[1] ≈ g_oldsvd[1] rtol = rtol + @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol +end + +@testset "Truncated SVD with χ=$χ" begin + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, R, trunc), r) + l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, old_alg, R, trunc), r) + l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, R, trunc), r) + + @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd + @test !isapprox(g_fullsvd[1], g_oldsvd[1]; rtol) + @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol +end + +# TODO: Add when Lorentzian broadening is implemented +# @testset "Truncated SVD with χ=$χ and ε=$lorentz_broadening broadening" begin +# l_fullsvd, g_fullsvd = withgradient( +# A -> lossfun(A, FullSVD(; lorentz_broadening, R; trunc), r +# ) +# l_oldsvd, g_oldsvd = withgradient(A -> lossfun(A, OldSVD(; lorentz_broadening), R; trunc), r) +# l_itersvd, g_itersvd = withgradient( +# A -> lossfun(A, IterSVD(; howmany=χ, lorentz_broadening), R; trunc), r +# ) + +# @test l_oldsvd ≈ l_itersvd ≈ l_fullsvd +# @test norm(g_fullsvd[1] - g_oldsvd[1]) / norm(g_fullsvd[1]) > rtol +# @test norm(g_fullsvd[1] - g_itersvd[1]) / norm(g_fullsvd[1]) < rtol +# end + +symm_m, symm_n = 18, 24 +symm_space = Z2Space(0 => symm_m, 1 => symm_n) +symm_trspace = truncspace(Z2Space(0 => symm_m ÷ 2, 1 => symm_n ÷ 3)) +symm_r = TensorMap(randn, dtype, symm_space, symm_space) +symm_R = TensorMap(randn, dtype, space(symm_r)) + +@testset "IterSVD of symmetric tensors" begin + l_fullsvd, g_fullsvd = withgradient(A -> lossfun(A, full_alg, symm_R), symm_r) + l_itersvd, g_itersvd = withgradient(A -> lossfun(A, iter_alg, symm_R), symm_r) + @test l_itersvd ≈ l_fullsvd + @test g_fullsvd[1] ≈ g_itersvd[1] rtol = rtol + + l_fullsvd_tr, g_fullsvd_tr = withgradient( + A -> lossfun(A, full_alg, symm_R, symm_trspace), symm_r + ) + l_itersvd_tr, g_itersvd_tr = withgradient( + A -> lossfun(A, iter_alg, symm_R, symm_trspace), symm_r + ) + @test l_itersvd_tr ≈ l_fullsvd_tr + @test g_fullsvd_tr[1] ≈ g_itersvd_tr[1] rtol = rtol + + iter_alg_fallback = @set iter_alg.fwd_alg.fallback_threshold = 0.4 # Do dense SVD in one block, sparse SVD in the other + l_itersvd_fb, g_itersvd_fb = withgradient( + A -> lossfun(A, iter_alg_fallback, symm_R, symm_trspace), symm_r + ) + @test l_itersvd_fb ≈ l_fullsvd_tr + @test g_fullsvd_tr[1] ≈ g_itersvd_fb[1] rtol = rtol +end diff --git a/test/heisenberg.jl b/test/heisenberg.jl index b72268ad..0be5774c 100644 --- a/test/heisenberg.jl +++ b/test/heisenberg.jl @@ -8,7 +8,7 @@ using OptimKit # initialize parameters χbond = 2 χenv = 16 -ctm_alg = CTMRG(; trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=100, verbosity=1) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1, trscheme=truncdim(χenv)) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=100, gradtol=1e-3, verbosity=2), diff --git a/test/pwave.jl b/test/pwave.jl index 0c08c93b..fda8269e 100644 --- a/test/pwave.jl +++ b/test/pwave.jl @@ -10,13 +10,11 @@ unitcell = (2, 2) H = square_lattice_pwave(; unitcell) χbond = 2 χenv = 16 -ctm_alg = CTMRG(; - trscheme=truncdim(χenv), tol=1e-10, miniter=4, maxiter=50, fixedspace=true, verbosity=1 -) +ctm_alg = CTMRG(; tol=1e-10, miniter=4, maxiter=100, verbosity=1) opt_alg = PEPSOptimize(; boundary_alg=ctm_alg, optimizer=LBFGS(4; maxiter=10, gradtol=1e-3, verbosity=2), - gradient_alg=GMRES(; tol=1e-3, maxiter=2, krylovdim=50, verbosity=2), + gradient_alg=GMRES(; tol=1e-3, maxiter=2, krylovdim=50), reuse_env=true, verbosity=2, ) diff --git a/test/runtests.jl b/test/runtests.jl index 799726b8..e80e627c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,6 +17,9 @@ end @time @safetestset "Gradients" begin include("ctmrg/gradients.jl") end + @time @safetestset "SVD wrapper" begin + include("ctmrg/svd_wrapper.jl") + end end if GROUP == "ALL" || GROUP == "MPS" @time @safetestset "VUMPS" begin