add stochastic reconfiguration

qiskit-community · Apr 26, 2024 · 35f309e · 35f309e
1 parent 4797e0d
commit 35f309e
Show file tree

Hide file tree

Showing 6 changed files with 430 additions and 41 deletions.
diff --git a/python/ffsim/optimize/__init__.py b/python/ffsim/optimize/__init__.py
@@ -11,7 +11,11 @@
 """Optimization algorithms."""
 
 from ffsim.optimize.linear_method import minimize_linear_method
+from ffsim.optimize.stochastic_reconfiguration import (
+    minimize_stochastic_reconfiguration,
+)
 
 __all__ = [
     "minimize_linear_method",
+    "minimize_stochastic_reconfiguration",
 ]
diff --git a/python/ffsim/optimize/_util.py b/python/ffsim/optimize/_util.py
@@ -0,0 +1,55 @@
+# (C) Copyright IBM 2024.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+from __future__ import annotations
+
+from typing import Callable
+
+import numpy as np
+from scipy.optimize import OptimizeResult
+from scipy.sparse.linalg import LinearOperator
+
+
+class WrappedCallable:
+    """Callable wrapper used to count function calls."""
+
+    def __init__(
+        self, func: Callable[[np.ndarray], np.ndarray], optimize_result: OptimizeResult
+    ):
+        self.func = func
+        self.optimize_result = optimize_result
+
+    def __call__(self, x: np.ndarray) -> np.ndarray:
+        self.optimize_result.nfev += 1
+        return self.func(x)
+
+
+class WrappedLinearOperator:
+    """LinearOperator wrapper used to count LinearOperator applications."""
+
+    def __init__(self, linop: LinearOperator, optimize_result: OptimizeResult):
+        self.linop = linop
+        self.optimize_result = optimize_result
+
+    def __matmul__(self, other: np.ndarray):
+        if len(other.shape) == 1:
+            self.optimize_result.nlinop += 1
+        else:
+            _, n = other.shape
+            self.optimize_result.nlinop += n
+        return self.linop @ other
+
+    def __rmatmul__(self, other: np.ndarray):
+        if len(other.shape) == 1:
+            self.optimize_result.nlinop += 1
+        else:
+            n, _ = other.shape
+            self.optimize_result.nlinop += n
+        return other @ self.linop
diff --git a/python/ffsim/optimize/linear_method.py b/python/ffsim/optimize/linear_method.py
@@ -20,47 +20,10 @@
 from scipy.optimize import OptimizeResult, minimize
 from scipy.sparse.linalg import LinearOperator
 
+from ffsim.optimize._util import WrappedCallable, WrappedLinearOperator
 from ffsim.states import one_hot
 
 
-class _WrappedCallable:
-    """Callable wrapper used to count function calls."""
-
-    def __init__(
-        self, func: Callable[[np.ndarray], np.ndarray], optimize_result: OptimizeResult
-    ):
-        self.func = func
-        self.optimize_result = optimize_result
-
-    def __call__(self, x: np.ndarray) -> np.ndarray:
-        self.optimize_result.nfev += 1
-        return self.func(x)
-
-
-class _WrappedLinearOperator:
-    """LinearOperator wrapper used to count LinearOperator applications."""
-
-    def __init__(self, linop: LinearOperator, optimize_result: OptimizeResult):
-        self.linop = linop
-        self.optimize_result = optimize_result
-
-    def __matmul__(self, other: np.ndarray):
-        if len(other.shape) == 1:
-            self.optimize_result.nlinop += 1
-        else:
-            _, n = other.shape
-            self.optimize_result.nlinop += n
-        return self.linop @ other
-
-    def __rmatmul__(self, other: np.ndarray):
-        if len(other.shape) == 1:
-            self.optimize_result.nlinop += 1
-        else:
-            n, _ = other.shape
-            self.optimize_result.nlinop += n
-        return other @ self.linop
-
-
 def minimize_linear_method(
     params_to_vec: Callable[[np.ndarray], np.ndarray],
     hamiltonian: LinearOperator,
@@ -165,8 +128,8 @@ def minimize_linear_method(
         x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0
     )
 
-    params_to_vec = _WrappedCallable(params_to_vec, intermediate_result)
-    hamiltonian = _WrappedLinearOperator(hamiltonian, intermediate_result)
+    params_to_vec = WrappedCallable(params_to_vec, intermediate_result)
+    hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result)
 
     for i in range(maxiter):
         vec = params_to_vec(params)

diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py
@@ -0,0 +1,249 @@
+# (C) Copyright IBM 2024.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Stochastic reconfiguration for optimization of a variational ansatz."""
+
+from __future__ import annotations
+
+import math
+from typing import Any, Callable
+
+import numpy as np
+import scipy.linalg
+from scipy.optimize import OptimizeResult, minimize
+from scipy.sparse.linalg import LinearOperator
+
+from ffsim.optimize._util import WrappedCallable, WrappedLinearOperator
+from ffsim.states import one_hot
+
+
+def minimize_stochastic_reconfiguration(
+    params_to_vec: Callable[[np.ndarray], np.ndarray],
+    hamiltonian: LinearOperator,
+    x0: np.ndarray,
+    *,
+    maxiter: int = 1000,
+    variation: float = 1.0,
+    cond: float | None = None,  # TODO document this
+    epsilon: float = 1e-8,
+    gtol: float = 1e-5,
+    optimize_hyperparameters: bool = True,
+    optimize_hyperparameters_args: dict | None = None,
+    callback: Callable[[OptimizeResult], Any] | None = None,
+) -> OptimizeResult:
+    """Minimize the energy of a variational ansatz using stochastic reconfiguration.
+
+    References:
+
+    - `Generalized Lanczos algorithm for variational quantum Monte Carlo`_
+
+    Args:
+        params_to_vec: Function representing the wavefunction ansatz. It takes as input
+            a vector of real-valued parameters and outputs the state vector represented
+            by those parameters.
+        hamiltonian: The Hamiltonian representing the energy to be minimized.
+        x0: Initial guess for the parameters.
+        maxiter: Maximum number of optimization iterations to perform.
+        variation: Hyperparameter controlling the size of parameter variations
+            used in the linear expansion of the wavefunction. Its value must be
+            strictly between 0 and 1. A larger value results in larger variations.
+        cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to
+            solve for the parameter update.
+        epsilon: Increment to use for approximating the gradient using
+            finite difference.
+        gtol: Convergence threshold for the norm of the projected gradient.
+        optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in
+            each iteration. Optimizing the hyperparameter incurs more function and
+            energy evaluations in each iteration, but may speed up convergence, leading
+            to fewer iterations overall. The optimization is performed using
+            `scipy.optimize.minimize`_.
+        optimize_hyperparameters_args: Arguments to use when calling
+            `scipy.optimize.minimize`_ to optimize the hyperparameters. The call is
+            constructed as
+
+            .. code::
+
+                scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args)
+
+            If not specified, the default value of `dict(method="L-BFGS-B")` will be
+            used.
+
+        callback: A callable called after each iteration. It is called with the
+            signature
+
+            .. code::
+
+                callback(intermediate_result: OptimizeResult)
+
+            where ``intermediate_result`` is a `scipy.optimize.OptimizeResult`_
+            with attributes ``x``  and ``fun``, the present values of the parameter
+            vector and objective function. For all iterations except for the last,
+            it also contains the ``jac`` attribute holding the present value of the
+            gradient, as well as ``regularization`` and ``variation`` attributes
+            holding the present values of the `regularization` and `variation`
+            hyperparameters.
+
+    Returns:
+        The optimization result represented as a `scipy.optimize.OptimizeResult`_
+        object. Note the following definitions of selected attributes:
+
+        - ``x``: The final parameters of the optimization.
+        - ``fun``: The energy associated with the final parameters. That is, the
+          expectation value of the Hamiltonian with respect to the state vector
+          corresponding to the parameters.
+        - ``nfev``: The number of times the ``params_to_vec`` function was called.
+        - ``nlinop``: The number of times the ``hamiltonian`` linear operator was
+          applied to a vector.
+
+    .. _Generalized Lanczos algorithm for variational quantum Monte Carlo: https://doi.org/10.1103/PhysRevB.64.024512
+    .. _scipy.optimize.OptimizeResult: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult
+    .. _scipy.linalg.lstsq: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html
+    .. _scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html
+    """  # noqa: E501
+    if variation <= 0:
+        raise ValueError(f"variation must be positive. Got {variation}.")
+    if maxiter < 1:
+        raise ValueError(f"maxiter must be at least 1. Got {maxiter}.")
+
+    if optimize_hyperparameters_args is None:
+        optimize_hyperparameters_args = dict(method="L-BFGS-B")
+
+    variation_param = math.sqrt(variation)
+    params = x0.copy()
+    converged = False
+    intermediate_result = OptimizeResult(
+        x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0
+    )
+
+    params_to_vec = WrappedCallable(params_to_vec, intermediate_result)
+    hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result)
+
+    for i in range(maxiter):
+        vec = params_to_vec(params)
+        jac = _jac(params_to_vec, params, vec, epsilon=epsilon)
+
+        energy, grad, overlap_mat = _sr_matrices(vec, jac, hamiltonian)
+        intermediate_result.njev += 1
+
+        if i > 0 and callback is not None:
+            intermediate_result.x = params
+            intermediate_result.fun = energy
+            intermediate_result.jac = grad
+            intermediate_result.overlap_mat = overlap_mat
+            intermediate_result.variation = variation
+            callback(intermediate_result)
+
+        if np.linalg.norm(grad) < gtol:
+            converged = True
+            break
+
+        if optimize_hyperparameters:
+
+            def f(x: np.ndarray) -> float:
+                (variation_param,) = x
+                variation = variation_param**2
+                param_update = _get_param_update(
+                    grad,
+                    overlap_mat,
+                    variation,
+                    cond=cond,
+                )
+                vec = params_to_vec(params + param_update)
+                return np.vdot(vec, hamiltonian @ vec).real
+
+            result = minimize(
+                f,
+                x0=[variation_param],
+                **optimize_hyperparameters_args,
+            )
+            (variation_param,) = result.x
+            variation = variation_param**2
+
+        param_update = _get_param_update(
+            grad,
+            overlap_mat,
+            variation,
+            cond=cond,
+        )
+        params = params + param_update
+        intermediate_result.nit += 1
+
+    vec = params_to_vec(params)
+    energy = np.vdot(vec, hamiltonian @ vec).real
+
+    intermediate_result.x = params
+    intermediate_result.fun = energy
+    del intermediate_result.jac
+    if callback is not None:
+        callback(intermediate_result)
+
+    if converged:
+        success = True
+        message = "Convergence: Norm of projected gradient <= gtol."
+    else:
+        success = False
+        message = "Stop: Total number of iterations reached limit."
+
+    return OptimizeResult(
+        x=params,
+        success=success,
+        message=message,
+        fun=energy,
+        jac=grad,
+        nfev=intermediate_result.nfev,
+        njev=intermediate_result.njev,
+        nlinop=intermediate_result.nlinop,
+        nit=intermediate_result.nit,
+    )
+
+
+def _sr_matrices(
+    vec: np.ndarray, jac: np.ndarray, hamiltonian: LinearOperator
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    energy = np.vdot(vec, hamiltonian @ vec)
+    grad = vec.conj() @ (hamiltonian @ jac)
+    overlap_mat = jac.T.conj() @ jac
+    return energy.real, 2 * grad.real, overlap_mat.real
+
+
+def _jac(
+    params_to_vec: Callable[[np.ndarray], np.ndarray],
+    theta: np.ndarray,
+    vec: np.ndarray,
+    epsilon: float,
+) -> np.ndarray:
+    jac = np.zeros((len(vec), len(theta)), dtype=complex)
+    for i in range(len(theta)):
+        grad = _grad(params_to_vec, theta, i, epsilon)
+        grad -= np.vdot(vec, grad) * vec
+        jac[:, i] = grad
+    return jac
+
+
+def _grad(
+    params_to_vec: Callable[[np.ndarray], np.ndarray],
+    theta: np.ndarray,
+    index: int,
+    epsilon: float,
+) -> np.ndarray:
+    unit = one_hot(len(theta), index, dtype=float)
+    plus = theta + epsilon * unit
+    minus = theta - epsilon * unit
+    return (params_to_vec(plus) - params_to_vec(minus)) / (2 * epsilon)
+
+
+def _get_param_update(
+    grad: np.ndarray,
+    overlap_mat: np.ndarray,
+    variation: float,
+    cond: float | None,
+) -> np.ndarray:
+    x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond)
+    return x
diff --git a/tests/python/optimize/linear_method_test.py b/tests/python/optimize/linear_method_test.py
@@ -81,7 +81,7 @@ def callback(intermediate_result):
     assert result.nit <= 7
     assert result.nit < result.nlinop < result.nfev
 
-    # optimization with optimizing hyperparameters
+    # optimization without optimizing hyperparameters
     info = defaultdict(list)
     result = ffsim.optimize.minimize_linear_method(
         params_to_vec,