From f6638c78e045834f7cb3b592f3a44a378bb60983 Mon Sep 17 00:00:00 2001 From: "Kevin J. Sung" Date: Fri, 26 Apr 2024 15:56:56 -0400 Subject: [PATCH] add stochastic reconfiguration --- python/ffsim/optimize/__init__.py | 4 + python/ffsim/optimize/_util.py | 127 ++++++++++ python/ffsim/optimize/linear_method.py | 78 +----- .../optimize/stochastic_reconfiguration.py | 228 ++++++++++++++++++ tests/python/optimize/linear_method_test.py | 2 +- .../stochastic_reconfiguration_test.py | 118 +++++++++ 6 files changed, 488 insertions(+), 69 deletions(-) create mode 100644 python/ffsim/optimize/_util.py create mode 100644 python/ffsim/optimize/stochastic_reconfiguration.py create mode 100644 tests/python/optimize/stochastic_reconfiguration_test.py diff --git a/python/ffsim/optimize/__init__.py b/python/ffsim/optimize/__init__.py index 594e4191a..8cb5b45c5 100644 --- a/python/ffsim/optimize/__init__.py +++ b/python/ffsim/optimize/__init__.py @@ -11,7 +11,11 @@ """Optimization algorithms.""" from ffsim.optimize.linear_method import minimize_linear_method +from ffsim.optimize.stochastic_reconfiguration import ( + minimize_stochastic_reconfiguration, +) __all__ = [ "minimize_linear_method", + "minimize_stochastic_reconfiguration", ] diff --git a/python/ffsim/optimize/_util.py b/python/ffsim/optimize/_util.py new file mode 100644 index 000000000..36fa6983b --- /dev/null +++ b/python/ffsim/optimize/_util.py @@ -0,0 +1,127 @@ +# (C) Copyright IBM 2024. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +from __future__ import annotations + +from typing import Callable + +import numpy as np +from scipy.optimize import OptimizeResult +from scipy.sparse.linalg import LinearOperator + +from ffsim import states + + +class WrappedCallable: + """Callable wrapper used to count function calls.""" + + def __init__( + self, func: Callable[[np.ndarray], np.ndarray], optimize_result: OptimizeResult + ): + self.func = func + self.optimize_result = optimize_result + + def __call__(self, x: np.ndarray) -> np.ndarray: + self.optimize_result.nfev += 1 + return self.func(x) + + +class WrappedLinearOperator: + """LinearOperator wrapper used to count LinearOperator applications.""" + + def __init__(self, linop: LinearOperator, optimize_result: OptimizeResult): + self.linop = linop + self.optimize_result = optimize_result + + def __matmul__(self, other: np.ndarray): + if len(other.shape) == 1: + self.optimize_result.nlinop += 1 + else: + _, n = other.shape + self.optimize_result.nlinop += n + return self.linop @ other + + def __rmatmul__(self, other: np.ndarray): + if len(other.shape) == 1: + self.optimize_result.nlinop += 1 + else: + n, _ = other.shape + self.optimize_result.nlinop += n + return other @ self.linop + + +def gradient_finite_diff( + params_to_vec: Callable[[np.ndarray], np.ndarray], + theta: np.ndarray, + index: int, + epsilon: float, +) -> np.ndarray: + """Return the gradient of one of the components of a function. + + Given a function that maps a vector of "parameters" to an output vector, return + the gradient of one of the parameter components. + + Args: + params_to_vec: Function that maps a parameter vector to an output vector. + theta: The parameters at which to evaluate the gradient. + index: The index of the parameter to take the gradient of. + epsilon: Finite difference step size. + + Returns: + The gradient of the desired parameter component. + """ + unit = states.one_hot(len(theta), index, dtype=float) + plus = theta + epsilon * unit + minus = theta - epsilon * unit + return (params_to_vec(plus) - params_to_vec(minus)) / (2 * epsilon) + + +def jacobian_finite_diff( + params_to_vec: Callable[[np.ndarray], np.ndarray], + theta: np.ndarray, + dim: int, + epsilon: float, +) -> np.ndarray: + """Return the Jacobian matrix of a function. + + Given a function that maps a vector of "parameters" to an output vector, return + the matrix whose :math:$i$-th column contains the gradient of the + :math:$i$-th component of the function. + + Args: + params_to_vec: Function that maps a parameter vector to an output vector. + theta: The parameters at which to evaluate the Jacobian. + dim: The dimension of an output vector of the function. + epsilon: Finite difference step size. + + Returns: + The Jacobian matrix. + """ + jac = np.zeros((dim, len(theta)), dtype=complex) + for i in range(len(theta)): + jac[:, i] = gradient_finite_diff(params_to_vec, theta, i, epsilon) + return jac + + +def orthogonalize_columns(mat: np.ndarray, vec: np.ndarray) -> np.ndarray: + """Orthogonalize the columns of a matrix with respect to a vector. + + Given a matrix and a vector, return a new matrix whose columns contain the + components of the old columns orthogonal to the vector. + + Args: + mat: The matrix. + vec: The vector. + + Returns: + The new matrix with columns orthogonal to the vector. + """ + coeffs = vec.T.conj() @ mat + return mat - vec.reshape((-1, 1)) * coeffs.reshape((1, -1)) diff --git a/python/ffsim/optimize/linear_method.py b/python/ffsim/optimize/linear_method.py index e8f47a6f9..d4442e0b7 100644 --- a/python/ffsim/optimize/linear_method.py +++ b/python/ffsim/optimize/linear_method.py @@ -20,45 +20,12 @@ from scipy.optimize import OptimizeResult, minimize from scipy.sparse.linalg import LinearOperator -from ffsim.states import one_hot - - -class _WrappedCallable: - """Callable wrapper used to count function calls.""" - - def __init__( - self, func: Callable[[np.ndarray], np.ndarray], optimize_result: OptimizeResult - ): - self.func = func - self.optimize_result = optimize_result - - def __call__(self, x: np.ndarray) -> np.ndarray: - self.optimize_result.nfev += 1 - return self.func(x) - - -class _WrappedLinearOperator: - """LinearOperator wrapper used to count LinearOperator applications.""" - - def __init__(self, linop: LinearOperator, optimize_result: OptimizeResult): - self.linop = linop - self.optimize_result = optimize_result - - def __matmul__(self, other: np.ndarray): - if len(other.shape) == 1: - self.optimize_result.nlinop += 1 - else: - _, n = other.shape - self.optimize_result.nlinop += n - return self.linop @ other - - def __rmatmul__(self, other: np.ndarray): - if len(other.shape) == 1: - self.optimize_result.nlinop += 1 - else: - n, _ = other.shape - self.optimize_result.nlinop += n - return other @ self.linop +from ffsim.optimize._util import ( + WrappedCallable, + WrappedLinearOperator, + jacobian_finite_diff, + orthogonalize_columns, +) def minimize_linear_method( @@ -165,12 +132,13 @@ def minimize_linear_method( x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0 ) - params_to_vec = _WrappedCallable(params_to_vec, intermediate_result) - hamiltonian = _WrappedLinearOperator(hamiltonian, intermediate_result) + params_to_vec = WrappedCallable(params_to_vec, intermediate_result) + hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result) for i in range(maxiter): vec = params_to_vec(params) - jac = _jac(params_to_vec, params, vec, epsilon=epsilon) + jac = jacobian_finite_diff(params_to_vec, params, len(vec), epsilon=epsilon) + jac = orthogonalize_columns(jac, vec) energy_mat, overlap_mat = _linear_method_matrices(vec, jac, hamiltonian) energy = energy_mat[0, 0] @@ -293,32 +261,6 @@ def _solve_linear_method_eigensystem( return eig, vec -def _jac( - params_to_vec: Callable[[np.ndarray], np.ndarray], - theta: np.ndarray, - vec: np.ndarray, - epsilon: float, -) -> np.ndarray: - jac = np.zeros((len(vec), len(theta)), dtype=complex) - for i in range(len(theta)): - grad = _grad(params_to_vec, theta, i, epsilon) - grad -= np.vdot(vec, grad) * vec - jac[:, i] = grad - return jac - - -def _grad( - params_to_vec: Callable[[np.ndarray], np.ndarray], - theta: np.ndarray, - index: int, - epsilon: float, -) -> np.ndarray: - unit = one_hot(len(theta), index, dtype=float) - plus = theta + epsilon * unit - minus = theta - epsilon * unit - return (params_to_vec(plus) - params_to_vec(minus)) / (2 * epsilon) - - def _get_param_update( energy_mat: np.ndarray, overlap_mat: np.ndarray, diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py new file mode 100644 index 000000000..6c48e5b95 --- /dev/null +++ b/python/ffsim/optimize/stochastic_reconfiguration.py @@ -0,0 +1,228 @@ +# (C) Copyright IBM 2024. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Stochastic reconfiguration for optimization of a variational ansatz.""" + +from __future__ import annotations + +import math +from typing import Any, Callable + +import numpy as np +import scipy.linalg +from scipy.optimize import OptimizeResult, minimize +from scipy.sparse.linalg import LinearOperator + +from ffsim.optimize._util import ( + WrappedCallable, + WrappedLinearOperator, + jacobian_finite_diff, + orthogonalize_columns, +) + + +def minimize_stochastic_reconfiguration( + params_to_vec: Callable[[np.ndarray], np.ndarray], + hamiltonian: LinearOperator, + x0: np.ndarray, + *, + maxiter: int = 1000, + variation: float = 1.0, + cond: float | None = None, # TODO document this + epsilon: float = 1e-8, + gtol: float = 1e-5, + optimize_hyperparameters: bool = True, + optimize_hyperparameters_args: dict | None = None, + callback: Callable[[OptimizeResult], Any] | None = None, +) -> OptimizeResult: + """Minimize the energy of a variational ansatz using stochastic reconfiguration. + + References: + + - `Generalized Lanczos algorithm for variational quantum Monte Carlo`_ + + Args: + params_to_vec: Function representing the wavefunction ansatz. It takes as input + a vector of real-valued parameters and outputs the state vector represented + by those parameters. + hamiltonian: The Hamiltonian representing the energy to be minimized. + x0: Initial guess for the parameters. + maxiter: Maximum number of optimization iterations to perform. + variation: Hyperparameter controlling the size of parameter variations + used in the linear expansion of the wavefunction. Its value must be + strictly between 0 and 1. A larger value results in larger variations. + cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to + solve for the parameter update. + epsilon: Increment to use for approximating the gradient using + finite difference. + gtol: Convergence threshold for the norm of the projected gradient. + optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in + each iteration. Optimizing the hyperparameter incurs more function and + energy evaluations in each iteration, but may speed up convergence, leading + to fewer iterations overall. The optimization is performed using + `scipy.optimize.minimize`_. + optimize_hyperparameters_args: Arguments to use when calling + `scipy.optimize.minimize`_ to optimize the hyperparameters. The call is + constructed as + + .. code:: + + scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args) + + If not specified, the default value of `dict(method="L-BFGS-B")` will be + used. + + callback: A callable called after each iteration. It is called with the + signature + + .. code:: + + callback(intermediate_result: OptimizeResult) + + where ``intermediate_result`` is a `scipy.optimize.OptimizeResult`_ + with attributes ``x`` and ``fun``, the present values of the parameter + vector and objective function. For all iterations except for the last, + it also contains the ``jac`` attribute holding the present value of the + gradient, as well as ``regularization`` and ``variation`` attributes + holding the present values of the `regularization` and `variation` + hyperparameters. + + Returns: + The optimization result represented as a `scipy.optimize.OptimizeResult`_ + object. Note the following definitions of selected attributes: + + - ``x``: The final parameters of the optimization. + - ``fun``: The energy associated with the final parameters. That is, the + expectation value of the Hamiltonian with respect to the state vector + corresponding to the parameters. + - ``nfev``: The number of times the ``params_to_vec`` function was called. + - ``nlinop``: The number of times the ``hamiltonian`` linear operator was + applied to a vector. + + .. _Generalized Lanczos algorithm for variational quantum Monte Carlo: https://doi.org/10.1103/PhysRevB.64.024512 + .. _scipy.optimize.OptimizeResult: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult + .. _scipy.linalg.lstsq: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html + .. _scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html + """ # noqa: E501 + if variation <= 0: + raise ValueError(f"variation must be positive. Got {variation}.") + if maxiter < 1: + raise ValueError(f"maxiter must be at least 1. Got {maxiter}.") + + if optimize_hyperparameters_args is None: + optimize_hyperparameters_args = dict(method="L-BFGS-B") + + variation_param = math.sqrt(variation) + params = x0.copy() + converged = False + intermediate_result = OptimizeResult( + x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0 + ) + + params_to_vec = WrappedCallable(params_to_vec, intermediate_result) + hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result) + + for i in range(maxiter): + vec = params_to_vec(params) + jac = jacobian_finite_diff(params_to_vec, params, len(vec), epsilon=epsilon) + jac = orthogonalize_columns(jac, vec) + + energy, grad, overlap_mat = _sr_matrices(vec, jac, hamiltonian) + intermediate_result.njev += 1 + + if i > 0 and callback is not None: + intermediate_result.x = params + intermediate_result.fun = energy + intermediate_result.jac = grad + intermediate_result.overlap_mat = overlap_mat + intermediate_result.variation = variation + callback(intermediate_result) + + if np.linalg.norm(grad) < gtol: + converged = True + break + + if optimize_hyperparameters: + + def f(x: np.ndarray) -> float: + (variation_param,) = x + variation = variation_param**2 + param_update = _get_param_update( + grad, + overlap_mat, + variation, + cond=cond, + ) + vec = params_to_vec(params + param_update) + return np.vdot(vec, hamiltonian @ vec).real + + result = minimize( + f, + x0=[variation_param], + **optimize_hyperparameters_args, + ) + (variation_param,) = result.x + variation = variation_param**2 + + param_update = _get_param_update( + grad, + overlap_mat, + variation, + cond=cond, + ) + params = params + param_update + intermediate_result.nit += 1 + + vec = params_to_vec(params) + energy = np.vdot(vec, hamiltonian @ vec).real + + intermediate_result.x = params + intermediate_result.fun = energy + del intermediate_result.jac + if callback is not None: + callback(intermediate_result) + + if converged: + success = True + message = "Convergence: Norm of projected gradient <= gtol." + else: + success = False + message = "Stop: Total number of iterations reached limit." + + return OptimizeResult( + x=params, + success=success, + message=message, + fun=energy, + jac=grad, + nfev=intermediate_result.nfev, + njev=intermediate_result.njev, + nlinop=intermediate_result.nlinop, + nit=intermediate_result.nit, + ) + + +def _sr_matrices( + vec: np.ndarray, jac: np.ndarray, hamiltonian: LinearOperator +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + energy = np.vdot(vec, hamiltonian @ vec) + grad = vec.conj() @ (hamiltonian @ jac) + overlap_mat = jac.T.conj() @ jac + return energy.real, 2 * grad.real, overlap_mat.real + + +def _get_param_update( + grad: np.ndarray, + overlap_mat: np.ndarray, + variation: float, + cond: float | None, +) -> np.ndarray: + x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond) + return x diff --git a/tests/python/optimize/linear_method_test.py b/tests/python/optimize/linear_method_test.py index cb475f47a..90c5ac48c 100644 --- a/tests/python/optimize/linear_method_test.py +++ b/tests/python/optimize/linear_method_test.py @@ -81,7 +81,7 @@ def callback(intermediate_result): assert result.nit <= 7 assert result.nit < result.nlinop < result.nfev - # optimization with optimizing hyperparameters + # optimization without optimizing hyperparameters info = defaultdict(list) result = ffsim.optimize.minimize_linear_method( params_to_vec, diff --git a/tests/python/optimize/stochastic_reconfiguration_test.py b/tests/python/optimize/stochastic_reconfiguration_test.py new file mode 100644 index 000000000..f5b57c88f --- /dev/null +++ b/tests/python/optimize/stochastic_reconfiguration_test.py @@ -0,0 +1,118 @@ +# (C) Copyright IBM 2024. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Tests for stochastic reconfiguration optimization algorithm.""" + +from __future__ import annotations + +from collections import defaultdict + +import numpy as np +import pyscf +import pytest + +import ffsim + + +def test_minimize_stochastic_reconfiguration(): + # Build an H2 molecule + mol = pyscf.gto.Mole() + mol.build( + atom=[["H", (0, 0, 0)], ["H", (0, 0, 1.8)]], + basis="sto-6g", + ) + hartree_fock = pyscf.scf.RHF(mol) + hartree_fock.kernel() + + # Initialize parameters + n_reps = 2 + n_params = ffsim.UCJOperator.n_params(hartree_fock.mol.nao_nr(), n_reps) + rng = np.random.default_rng(1804) + x0 = rng.uniform(-10, 10, size=n_params) + + # Get molecular data and molecular Hamiltonian (one- and two-body tensors) + mol_data = ffsim.MolecularData.from_scf(hartree_fock) + norb = mol_data.norb + nelec = mol_data.nelec + mol_hamiltonian = mol_data.hamiltonian + hamiltonian = ffsim.linear_operator(mol_hamiltonian, norb=norb, nelec=nelec) + reference_state = ffsim.hartree_fock_state(norb, nelec) + + def params_to_vec(x: np.ndarray): + operator = ffsim.UCJOperator.from_parameters(x, norb=norb, n_reps=n_reps) + return ffsim.apply_unitary(reference_state, operator, norb=norb, nelec=nelec) + + def energy(x: np.ndarray): + vec = params_to_vec(x) + return np.real(np.vdot(vec, hamiltonian @ vec)) + + info = defaultdict(list) + + def callback(intermediate_result): + info["x"].append(intermediate_result.x) + info["fun"].append(intermediate_result.fun) + np.testing.assert_allclose( + energy(intermediate_result.x), intermediate_result.fun + ) + if hasattr(intermediate_result, "jac"): + info["jac"].append(intermediate_result.jac) + if hasattr(intermediate_result, "variation"): + info["variation"].append(intermediate_result.variation) + + # default optimization + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, callback=callback + ) + np.testing.assert_allclose(energy(result.x), result.fun) + np.testing.assert_allclose(result.fun, -0.970773) + np.testing.assert_allclose(info["fun"][0], -0.853501, atol=1e-5) + np.testing.assert_allclose(info["fun"][-1], -0.970773, atol=1e-5) + for params, fun in zip(info["x"], info["fun"]): + np.testing.assert_allclose(energy(params), fun) + assert result.nit <= 20 + assert result.nit < result.nlinop < result.nfev + + # optimization without optimizing hyperparameters + info = defaultdict(list) + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, + x0=x0, + hamiltonian=hamiltonian, + optimize_hyperparameters=False, + callback=callback, + ) + np.testing.assert_allclose(energy(result.x), result.fun) + np.testing.assert_allclose(result.fun, -0.970773) + for params, fun in zip(info["x"], info["fun"]): + np.testing.assert_allclose(energy(params), fun) + assert result.nit <= 30 + assert result.nit < result.nlinop < result.nfev + assert set(info["variation"]) == {1.0} + + # optimization with maxiter + info = defaultdict(list) + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, hamiltonian=hamiltonian, x0=x0, maxiter=3, callback=callback + ) + assert result.nit == 3 + assert len(info["x"]) == 3 + assert len(info["fun"]) == 3 + assert len(info["jac"]) == 2 + np.testing.assert_allclose(energy(result.x), result.fun) + + # test raising errors + with pytest.raises(ValueError, match="variation"): + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, variation=-0.1 + ) + with pytest.raises(ValueError, match="maxiter"): + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, maxiter=0 + )