Skip to content

Commit

Permalink
add stochastic reconfiguration
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinsung committed Apr 26, 2024
1 parent 4797e0d commit 35f309e
Show file tree
Hide file tree
Showing 6 changed files with 430 additions and 41 deletions.
4 changes: 4 additions & 0 deletions python/ffsim/optimize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
"""Optimization algorithms."""

from ffsim.optimize.linear_method import minimize_linear_method
from ffsim.optimize.stochastic_reconfiguration import (
minimize_stochastic_reconfiguration,
)

__all__ = [
"minimize_linear_method",
"minimize_stochastic_reconfiguration",
]
55 changes: 55 additions & 0 deletions python/ffsim/optimize/_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# (C) Copyright IBM 2024.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

from __future__ import annotations

from typing import Callable

import numpy as np
from scipy.optimize import OptimizeResult
from scipy.sparse.linalg import LinearOperator


class WrappedCallable:
"""Callable wrapper used to count function calls."""

def __init__(
self, func: Callable[[np.ndarray], np.ndarray], optimize_result: OptimizeResult
):
self.func = func
self.optimize_result = optimize_result

def __call__(self, x: np.ndarray) -> np.ndarray:
self.optimize_result.nfev += 1
return self.func(x)


class WrappedLinearOperator:
"""LinearOperator wrapper used to count LinearOperator applications."""

def __init__(self, linop: LinearOperator, optimize_result: OptimizeResult):
self.linop = linop
self.optimize_result = optimize_result

def __matmul__(self, other: np.ndarray):
if len(other.shape) == 1:
self.optimize_result.nlinop += 1
else:
_, n = other.shape
self.optimize_result.nlinop += n
return self.linop @ other

def __rmatmul__(self, other: np.ndarray):
if len(other.shape) == 1:
self.optimize_result.nlinop += 1
else:
n, _ = other.shape
self.optimize_result.nlinop += n
return other @ self.linop
43 changes: 3 additions & 40 deletions python/ffsim/optimize/linear_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,47 +20,10 @@
from scipy.optimize import OptimizeResult, minimize
from scipy.sparse.linalg import LinearOperator

from ffsim.optimize._util import WrappedCallable, WrappedLinearOperator
from ffsim.states import one_hot


class _WrappedCallable:
"""Callable wrapper used to count function calls."""

def __init__(
self, func: Callable[[np.ndarray], np.ndarray], optimize_result: OptimizeResult
):
self.func = func
self.optimize_result = optimize_result

def __call__(self, x: np.ndarray) -> np.ndarray:
self.optimize_result.nfev += 1
return self.func(x)


class _WrappedLinearOperator:
"""LinearOperator wrapper used to count LinearOperator applications."""

def __init__(self, linop: LinearOperator, optimize_result: OptimizeResult):
self.linop = linop
self.optimize_result = optimize_result

def __matmul__(self, other: np.ndarray):
if len(other.shape) == 1:
self.optimize_result.nlinop += 1
else:
_, n = other.shape
self.optimize_result.nlinop += n
return self.linop @ other

def __rmatmul__(self, other: np.ndarray):
if len(other.shape) == 1:
self.optimize_result.nlinop += 1
else:
n, _ = other.shape
self.optimize_result.nlinop += n
return other @ self.linop


def minimize_linear_method(
params_to_vec: Callable[[np.ndarray], np.ndarray],
hamiltonian: LinearOperator,
Expand Down Expand Up @@ -165,8 +128,8 @@ def minimize_linear_method(
x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0
)

params_to_vec = _WrappedCallable(params_to_vec, intermediate_result)
hamiltonian = _WrappedLinearOperator(hamiltonian, intermediate_result)
params_to_vec = WrappedCallable(params_to_vec, intermediate_result)
hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result)

for i in range(maxiter):
vec = params_to_vec(params)
Expand Down
249 changes: 249 additions & 0 deletions python/ffsim/optimize/stochastic_reconfiguration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
# (C) Copyright IBM 2024.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""Stochastic reconfiguration for optimization of a variational ansatz."""

from __future__ import annotations

import math
from typing import Any, Callable

import numpy as np
import scipy.linalg
from scipy.optimize import OptimizeResult, minimize
from scipy.sparse.linalg import LinearOperator

from ffsim.optimize._util import WrappedCallable, WrappedLinearOperator
from ffsim.states import one_hot


def minimize_stochastic_reconfiguration(
params_to_vec: Callable[[np.ndarray], np.ndarray],
hamiltonian: LinearOperator,
x0: np.ndarray,
*,
maxiter: int = 1000,
variation: float = 1.0,
cond: float | None = None, # TODO document this
epsilon: float = 1e-8,
gtol: float = 1e-5,
optimize_hyperparameters: bool = True,
optimize_hyperparameters_args: dict | None = None,
callback: Callable[[OptimizeResult], Any] | None = None,
) -> OptimizeResult:
"""Minimize the energy of a variational ansatz using stochastic reconfiguration.
References:
- `Generalized Lanczos algorithm for variational quantum Monte Carlo`_
Args:
params_to_vec: Function representing the wavefunction ansatz. It takes as input
a vector of real-valued parameters and outputs the state vector represented
by those parameters.
hamiltonian: The Hamiltonian representing the energy to be minimized.
x0: Initial guess for the parameters.
maxiter: Maximum number of optimization iterations to perform.
variation: Hyperparameter controlling the size of parameter variations
used in the linear expansion of the wavefunction. Its value must be
strictly between 0 and 1. A larger value results in larger variations.
cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to
solve for the parameter update.
epsilon: Increment to use for approximating the gradient using
finite difference.
gtol: Convergence threshold for the norm of the projected gradient.
optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in
each iteration. Optimizing the hyperparameter incurs more function and
energy evaluations in each iteration, but may speed up convergence, leading
to fewer iterations overall. The optimization is performed using
`scipy.optimize.minimize`_.
optimize_hyperparameters_args: Arguments to use when calling
`scipy.optimize.minimize`_ to optimize the hyperparameters. The call is
constructed as
.. code::
scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args)
If not specified, the default value of `dict(method="L-BFGS-B")` will be
used.
callback: A callable called after each iteration. It is called with the
signature
.. code::
callback(intermediate_result: OptimizeResult)
where ``intermediate_result`` is a `scipy.optimize.OptimizeResult`_
with attributes ``x`` and ``fun``, the present values of the parameter
vector and objective function. For all iterations except for the last,
it also contains the ``jac`` attribute holding the present value of the
gradient, as well as ``regularization`` and ``variation`` attributes
holding the present values of the `regularization` and `variation`
hyperparameters.
Returns:
The optimization result represented as a `scipy.optimize.OptimizeResult`_
object. Note the following definitions of selected attributes:
- ``x``: The final parameters of the optimization.
- ``fun``: The energy associated with the final parameters. That is, the
expectation value of the Hamiltonian with respect to the state vector
corresponding to the parameters.
- ``nfev``: The number of times the ``params_to_vec`` function was called.
- ``nlinop``: The number of times the ``hamiltonian`` linear operator was
applied to a vector.
.. _Generalized Lanczos algorithm for variational quantum Monte Carlo: https://doi.org/10.1103/PhysRevB.64.024512
.. _scipy.optimize.OptimizeResult: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult
.. _scipy.linalg.lstsq: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html
.. _scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html
""" # noqa: E501
if variation <= 0:
raise ValueError(f"variation must be positive. Got {variation}.")
if maxiter < 1:
raise ValueError(f"maxiter must be at least 1. Got {maxiter}.")

if optimize_hyperparameters_args is None:
optimize_hyperparameters_args = dict(method="L-BFGS-B")

variation_param = math.sqrt(variation)
params = x0.copy()
converged = False
intermediate_result = OptimizeResult(
x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0
)

params_to_vec = WrappedCallable(params_to_vec, intermediate_result)
hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result)

for i in range(maxiter):
vec = params_to_vec(params)
jac = _jac(params_to_vec, params, vec, epsilon=epsilon)

energy, grad, overlap_mat = _sr_matrices(vec, jac, hamiltonian)
intermediate_result.njev += 1

if i > 0 and callback is not None:
intermediate_result.x = params
intermediate_result.fun = energy
intermediate_result.jac = grad
intermediate_result.overlap_mat = overlap_mat
intermediate_result.variation = variation
callback(intermediate_result)

if np.linalg.norm(grad) < gtol:
converged = True
break

if optimize_hyperparameters:

def f(x: np.ndarray) -> float:
(variation_param,) = x
variation = variation_param**2
param_update = _get_param_update(
grad,
overlap_mat,
variation,
cond=cond,
)
vec = params_to_vec(params + param_update)
return np.vdot(vec, hamiltonian @ vec).real

result = minimize(
f,
x0=[variation_param],
**optimize_hyperparameters_args,
)
(variation_param,) = result.x
variation = variation_param**2

param_update = _get_param_update(
grad,
overlap_mat,
variation,
cond=cond,
)
params = params + param_update
intermediate_result.nit += 1

vec = params_to_vec(params)
energy = np.vdot(vec, hamiltonian @ vec).real

intermediate_result.x = params
intermediate_result.fun = energy
del intermediate_result.jac
if callback is not None:
callback(intermediate_result)

if converged:
success = True
message = "Convergence: Norm of projected gradient <= gtol."
else:
success = False
message = "Stop: Total number of iterations reached limit."

return OptimizeResult(
x=params,
success=success,
message=message,
fun=energy,
jac=grad,
nfev=intermediate_result.nfev,
njev=intermediate_result.njev,
nlinop=intermediate_result.nlinop,
nit=intermediate_result.nit,
)


def _sr_matrices(
vec: np.ndarray, jac: np.ndarray, hamiltonian: LinearOperator
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
energy = np.vdot(vec, hamiltonian @ vec)
grad = vec.conj() @ (hamiltonian @ jac)
overlap_mat = jac.T.conj() @ jac
return energy.real, 2 * grad.real, overlap_mat.real


def _jac(
params_to_vec: Callable[[np.ndarray], np.ndarray],
theta: np.ndarray,
vec: np.ndarray,
epsilon: float,
) -> np.ndarray:
jac = np.zeros((len(vec), len(theta)), dtype=complex)
for i in range(len(theta)):
grad = _grad(params_to_vec, theta, i, epsilon)
grad -= np.vdot(vec, grad) * vec
jac[:, i] = grad
return jac


def _grad(
params_to_vec: Callable[[np.ndarray], np.ndarray],
theta: np.ndarray,
index: int,
epsilon: float,
) -> np.ndarray:
unit = one_hot(len(theta), index, dtype=float)
plus = theta + epsilon * unit
minus = theta - epsilon * unit
return (params_to_vec(plus) - params_to_vec(minus)) / (2 * epsilon)


def _get_param_update(
grad: np.ndarray,
overlap_mat: np.ndarray,
variation: float,
cond: float | None,
) -> np.ndarray:
x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond)
return x
2 changes: 1 addition & 1 deletion tests/python/optimize/linear_method_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def callback(intermediate_result):
assert result.nit <= 7
assert result.nit < result.nlinop < result.nfev

# optimization with optimizing hyperparameters
# optimization without optimizing hyperparameters
info = defaultdict(list)
result = ffsim.optimize.minimize_linear_method(
params_to_vec,
Expand Down
Loading

0 comments on commit 35f309e

Please sign in to comment.