qiskit-community · kevinsung · Apr 28, 2024 · Apr 27, 2024
diff --git a/python/ffsim/optimize/_util.py b/python/ffsim/optimize/_util.py
@@ -0,0 +1,127 @@
+# (C) Copyright IBM 2024.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+from __future__ import annotations
+
+from typing import Callable
+
+import numpy as np
+from scipy.optimize import OptimizeResult
+from scipy.sparse.linalg import LinearOperator
+
+from ffsim import states
+
+
+class WrappedCallable:
+    """Callable wrapper used to count function calls."""
+
+    def __init__(
+        self, func: Callable[[np.ndarray], np.ndarray], optimize_result: OptimizeResult
+    ):
+        self.func = func
+        self.optimize_result = optimize_result
+
+    def __call__(self, x: np.ndarray) -> np.ndarray:
+        self.optimize_result.nfev += 1
+        return self.func(x)
+
+
+class WrappedLinearOperator:
+    """LinearOperator wrapper used to count LinearOperator applications."""
+
+    def __init__(self, linop: LinearOperator, optimize_result: OptimizeResult):
+        self.linop = linop
+        self.optimize_result = optimize_result
+
+    def __matmul__(self, other: np.ndarray):
+        if len(other.shape) == 1:
+            self.optimize_result.nlinop += 1
+        else:
+            _, n = other.shape
+            self.optimize_result.nlinop += n
+        return self.linop @ other
+
+    def __rmatmul__(self, other: np.ndarray):
+        if len(other.shape) == 1:
+            self.optimize_result.nlinop += 1
+        else:
+            n, _ = other.shape
+            self.optimize_result.nlinop += n
+        return other @ self.linop
+
+
+def gradient_finite_diff(
+    params_to_vec: Callable[[np.ndarray], np.ndarray],
+    theta: np.ndarray,
+    index: int,
+    epsilon: float,
+) -> np.ndarray:
+    """Return the gradient of one of the components of a function.
+
+    Given a function that maps a vector of "parameters" to an output vector, return
+    the gradient of one of the parameter components.
+
+    Args:
+        params_to_vec: Function that maps a parameter vector to an output vector.
+        theta: The parameters at which to evaluate the gradient.
+        index: The index of the parameter to take the gradient of.
+        epsilon: Finite difference step size.
+
+    Returns:
+        The gradient of the desired parameter component.
+    """
+    unit = states.one_hot(len(theta), index, dtype=float)
+    plus = theta + epsilon * unit
+    minus = theta - epsilon * unit
+    return (params_to_vec(plus) - params_to_vec(minus)) / (2 * epsilon)
+
+
+def jacobian_finite_diff(
+    params_to_vec: Callable[[np.ndarray], np.ndarray],
+    theta: np.ndarray,
+    dim: int,
+    epsilon: float,
+) -> np.ndarray:
+    """Return the Jacobian matrix of a function.
+
+    Given a function that maps a vector of "parameters" to an output vector, return
+    the matrix whose :math:$i$-th column contains the gradient of the
+    :math:$i$-th component of the function.
+
+    Args:
+        params_to_vec: Function that maps a parameter vector to an output vector.
+        theta: The parameters at which to evaluate the Jacobian.
+        dim: The dimension of an output vector of the function.
+        epsilon: Finite difference step size.
+
+    Returns:
+        The Jacobian matrix.
+    """
+    jac = np.zeros((dim, len(theta)), dtype=complex)
+    for i in range(len(theta)):
+        jac[:, i] = gradient_finite_diff(params_to_vec, theta, i, epsilon)
+    return jac
+
+
+def orthogonalize_columns(mat: np.ndarray, vec: np.ndarray) -> np.ndarray:
+    """Orthogonalize the columns of a matrix with respect to a vector.
+
+    Given a matrix and a vector, return a new matrix whose columns contain the
+    components of the old columns orthogonal to the vector.
+
+    Args:
+        mat: The matrix.
+        vec: The vector.
+
+    Returns:
+        The new matrix with columns orthogonal to the vector.
+    """
+    coeffs = vec.T.conj() @ mat
+    return mat - vec.reshape((-1, 1)) * coeffs.reshape((1, -1))
diff --git a/python/ffsim/optimize/linear_method.py b/python/ffsim/optimize/linear_method.py
@@ -20,45 +20,12 @@
 from scipy.optimize import OptimizeResult, minimize
 from scipy.sparse.linalg import LinearOperator
 
-from ffsim.states import one_hot
-
-
-class _WrappedCallable:
-    """Callable wrapper used to count function calls."""
-
-    def __init__(
-        self, func: Callable[[np.ndarray], np.ndarray], optimize_result: OptimizeResult
-    ):
-        self.func = func
-        self.optimize_result = optimize_result
-
-    def __call__(self, x: np.ndarray) -> np.ndarray:
-        self.optimize_result.nfev += 1
-        return self.func(x)
-
-
-class _WrappedLinearOperator:
-    """LinearOperator wrapper used to count LinearOperator applications."""
-
-    def __init__(self, linop: LinearOperator, optimize_result: OptimizeResult):
-        self.linop = linop
-        self.optimize_result = optimize_result
-
-    def __matmul__(self, other: np.ndarray):
-        if len(other.shape) == 1:
-            self.optimize_result.nlinop += 1
-        else:
-            _, n = other.shape
-            self.optimize_result.nlinop += n
-        return self.linop @ other
-
-    def __rmatmul__(self, other: np.ndarray):
-        if len(other.shape) == 1:
-            self.optimize_result.nlinop += 1
-        else:
-            n, _ = other.shape
-            self.optimize_result.nlinop += n
-        return other @ self.linop
+from ffsim.optimize._util import (
+    WrappedCallable,
+    WrappedLinearOperator,
+    jacobian_finite_diff,
+    orthogonalize_columns,
+)
 
 
 def minimize_linear_method(
@@ -165,12 +132,13 @@ def minimize_linear_method(
         x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0
     )
 
-    params_to_vec = _WrappedCallable(params_to_vec, intermediate_result)
-    hamiltonian = _WrappedLinearOperator(hamiltonian, intermediate_result)
+    params_to_vec = WrappedCallable(params_to_vec, intermediate_result)
+    hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result)
 
     for i in range(maxiter):
         vec = params_to_vec(params)
-        jac = _jac(params_to_vec, params, vec, epsilon=epsilon)
+        jac = jacobian_finite_diff(params_to_vec, params, len(vec), epsilon=epsilon)
+        jac = orthogonalize_columns(jac, vec)
 
         energy_mat, overlap_mat = _linear_method_matrices(vec, jac, hamiltonian)
         energy = energy_mat[0, 0]
@@ -293,32 +261,6 @@ def _solve_linear_method_eigensystem(
     return eig, vec
 
 
-def _jac(
-    params_to_vec: Callable[[np.ndarray], np.ndarray],
-    theta: np.ndarray,
-    vec: np.ndarray,
-    epsilon: float,
-) -> np.ndarray:
-    jac = np.zeros((len(vec), len(theta)), dtype=complex)
-    for i in range(len(theta)):
-        grad = _grad(params_to_vec, theta, i, epsilon)
-        grad -= np.vdot(vec, grad) * vec
-        jac[:, i] = grad
-    return jac
-
-
-def _grad(
-    params_to_vec: Callable[[np.ndarray], np.ndarray],
-    theta: np.ndarray,
-    index: int,
-    epsilon: float,
-) -> np.ndarray:
-    unit = one_hot(len(theta), index, dtype=float)
-    plus = theta + epsilon * unit
-    minus = theta - epsilon * unit
-    return (params_to_vec(plus) - params_to_vec(minus)) / (2 * epsilon)
-
-
 def _get_param_update(
     energy_mat: np.ndarray,
     overlap_mat: np.ndarray,

diff --git a/tests/python/optimize/linear_method_test.py b/tests/python/optimize/linear_method_test.py
@@ -81,7 +81,7 @@ def callback(intermediate_result):
     assert result.nit <= 7
     assert result.nit < result.nlinop < result.nfev
 
-    # optimization with optimizing hyperparameters
+    # optimization without optimizing hyperparameters
     info = defaultdict(list)
     result = ffsim.optimize.minimize_linear_method(
         params_to_vec,