From b385e8aade2d60c2668139db24429c9f5234fa89 Mon Sep 17 00:00:00 2001
From: "Kevin J. Sung" <kevjsung@umich.edu>
Date: Sat, 9 Dec 2023 13:36:59 -0500
Subject: [PATCH] Add final orbital rotation to hop gate ansatz (#85)

* add orbital rotation to hop gate ansatz

* use in final orbital rotation for ucj

* use for rest of ucj

* add test

* doc

* mypy

* update notebook
---
 docs/tutorials/05-entanglement-forging.ipynb |  8 +-
 python/ffsim/variational/hopgate.py          | 49 ++++++++++-
 python/ffsim/variational/ucj.py              | 91 +++++---------------
 python/ffsim/variational/util.py             | 74 ++++++++++++++++
 tests/variational/hopgate_test.py            | 56 ++++++++++++
 tests/variational/ucj_test.py                |  2 +
 6 files changed, 207 insertions(+), 73 deletions(-)
 create mode 100644 python/ffsim/variational/util.py
 create mode 100644 tests/variational/hopgate_test.py

diff --git a/docs/tutorials/05-entanglement-forging.ipynb b/docs/tutorials/05-entanglement-forging.ipynb
index c7d4422b4..959d17297 100644
--- a/docs/tutorials/05-entanglement-forging.ipynb
+++ b/docs/tutorials/05-entanglement-forging.ipynb
@@ -87,7 +87,7 @@
     "interaction_pairs = list(brickwork(norb, n_layers))\n",
     "rng = np.random.default_rng(1234)\n",
     "thetas = rng.uniform(-np.pi, np.pi, size=len(interaction_pairs))\n",
-    "operator = ffsim.HopGateAnsatzOperator(interaction_pairs, thetas=thetas)\n",
+    "operator = ffsim.HopGateAnsatzOperator(norb, interaction_pairs, thetas)\n",
     "\n",
     "# Compute energy of ansatz state\n",
     "reference_occupations_spatial = [(0, 1, 2, 3), (1, 2, 3, 4), (0, 1, 2, 4)]\n",
@@ -114,10 +114,10 @@
       "  message: STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT\n",
       "  success: False\n",
       "   status: 1\n",
-      "      fun: -75.68085278176007\n",
+      "      fun: -75.68085225170408\n",
       "        x: [ 2.996e+00 -7.549e-01 ...  2.650e+00  8.012e-01]\n",
       "      nit: 6\n",
-      "      jac: [ 1.759e-03  9.119e-03 ... -1.192e-02  9.436e-04]\n",
+      "      jac: [ 1.756e-03  9.118e-03 ... -1.192e-02  9.521e-04]\n",
       "     nfev: 112\n",
       "     njev: 7\n",
       " hess_inv: <15x15 LbfgsInvHessProduct with dtype=float64>\n"
@@ -130,7 +130,7 @@
     "\n",
     "def fun(x):\n",
     "    # Initialize the ansatz operator from the parameter vector\n",
-    "    operator = ffsim.HopGateAnsatzOperator(interaction_pairs, x)\n",
+    "    operator = ffsim.HopGateAnsatzOperator(norb, interaction_pairs, x)\n",
     "    # Compute energy\n",
     "    energy, _ = ffsim.multireference_state(\n",
     "        mol_hamiltonian, operator, reference_occupations, norb=norb, nelec=nelec\n",
diff --git a/python/ffsim/variational/hopgate.py b/python/ffsim/variational/hopgate.py
index 9830a1db8..a5f012098 100644
--- a/python/ffsim/variational/hopgate.py
+++ b/python/ffsim/variational/hopgate.py
@@ -17,15 +17,21 @@
 
 import numpy as np
 
-from ffsim.gates import apply_hop_gate
+from ffsim.gates import apply_hop_gate, apply_orbital_rotation
+from ffsim.variational.util import (
+    orbital_rotation_from_parameters,
+    orbital_rotation_to_parameters,
+)
 
 
 @dataclass(frozen=True)
 class HopGateAnsatzOperator:
     r"""A hop gate ansatz operator."""
 
+    norb: int
     interaction_pairs: list[tuple[int, int]]
     thetas: np.ndarray
+    final_orbital_rotation: np.ndarray | None = None
 
     def _apply_unitary_(
         self, vec: np.ndarray, norb: int, nelec: tuple[int, int], copy: bool
@@ -39,4 +45,45 @@ def _apply_unitary_(
             vec = apply_hop_gate(
                 vec, theta, target_orbs=target_orbs, norb=norb, nelec=nelec, copy=False
             )
+        if self.final_orbital_rotation is not None:
+            vec = apply_orbital_rotation(
+                vec,
+                mat=self.final_orbital_rotation,
+                norb=norb,
+                nelec=nelec,
+                copy=False,
+            )
         return vec
+
+    def to_parameters(self) -> np.ndarray:
+        """Convert the hop gate ansatz operator to a real-valued parameter vector."""
+        num_params = len(self.thetas)
+        if self.final_orbital_rotation is not None:
+            num_params += self.norb**2
+        params = np.zeros(num_params)
+        params[: len(self.thetas)] = self.thetas
+        if self.final_orbital_rotation is not None:
+            params[len(self.thetas) :] = orbital_rotation_to_parameters(
+                self.final_orbital_rotation
+            )
+        return params
+
+    @staticmethod
+    def from_parameters(
+        params: np.ndarray,
+        norb: int,
+        interaction_pairs: list[tuple[int, int]],
+        with_final_orbital_rotation: bool = False,
+    ) -> HopGateAnsatzOperator:
+        final_orbital_rotation = None
+        if with_final_orbital_rotation:
+            final_orbital_rotation = orbital_rotation_from_parameters(
+                params[-(norb**2) :], norb
+            )
+            params = params[: -(norb**2)]
+        return HopGateAnsatzOperator(
+            norb=norb,
+            interaction_pairs=interaction_pairs,
+            thetas=params,
+            final_orbital_rotation=final_orbital_rotation,
+        )
diff --git a/python/ffsim/variational/ucj.py b/python/ffsim/variational/ucj.py
index 52bda2514..db37d9871 100644
--- a/python/ffsim/variational/ucj.py
+++ b/python/ffsim/variational/ucj.py
@@ -22,6 +22,10 @@
 
 from ffsim.gates import apply_diag_coulomb_evolution, apply_orbital_rotation
 from ffsim.linalg import double_factorized_t2
+from ffsim.variational.util import (
+    orbital_rotation_from_parameters,
+    orbital_rotation_to_parameters,
+)
 
 
 @dataclass(frozen=True)
@@ -84,7 +88,7 @@ def from_parameters(
         alpha_alpha_indices: list[tuple[int, int]] | None = None,
         alpha_beta_indices: list[tuple[int, int]] | None = None,
         with_final_orbital_rotation: bool = False,
-    ) -> "UCJOperator":
+    ) -> UCJOperator:
         """Initialize the UCJ operator from a real-valued parameter vector."""
         return UCJOperator(
             *_ucj_from_parameters(
@@ -120,7 +124,7 @@ def from_t_amplitudes(
         t1_amplitudes: np.ndarray | None = None,
         n_reps: int | None = None,
         tol: float = 1e-8,
-    ) -> "UCJOperator":
+    ) -> UCJOperator:
         """Initialize the UCJ operator from t2 (and optionally t1) amplitudes."""
         # TODO maybe allow specifying alpha-alpha and alpha-beta indices
         nocc, _, nvrt, _ = t2_amplitudes.shape
@@ -417,14 +421,13 @@ def _ucj_from_parameters(
         List[Tuple[int, int]],
         list(itertools.combinations_with_replacement(range(norb), 2)),
     )
-    triu_indices_no_diag = list(itertools.combinations(range(norb), 2))
     if alpha_alpha_indices is None:
         alpha_alpha_indices = triu_indices
     if alpha_beta_indices is None:
         alpha_beta_indices = triu_indices
     diag_coulomb_mats_alpha_alpha = np.zeros((n_reps, norb, norb))
     diag_coulomb_mats_alpha_beta = np.zeros((n_reps, norb, norb))
-    orbital_rotation_generators = np.zeros((n_reps, norb, norb), dtype=complex)
+    orbital_rotations = np.zeros((n_reps, norb, norb), dtype=complex)
     index = 0
     # diag coulomb matrices, alpha-alpha
     indices = alpha_alpha_indices
@@ -446,49 +449,16 @@ def _ucj_from_parameters(
             mat[rows, cols] = vals
             mat[cols, rows] = vals
             index += n_params
-    # orbital rotations, imaginary part
-    indices = triu_indices
-    n_params = len(indices)
-    rows, cols = zip(*indices)
-    for mat in orbital_rotation_generators:
-        vals = 1j * params[index : index + n_params]
-        mat[rows, cols] = vals
-        mat[cols, rows] = vals
-        index += n_params
-    # orbital rotations, real part
-    indices = triu_indices_no_diag
-    n_params = len(indices)
-    rows, cols = zip(*indices)
-    for mat in orbital_rotation_generators:
-        vals = params[index : index + n_params]
-        mat[rows, cols] += vals
-        mat[cols, rows] -= vals
-        index += n_params
-    # exponentiate orbital rotation generators
-    orbital_rotations = np.stack(
-        [scipy.linalg.expm(mat) for mat in orbital_rotation_generators]
-    )
+    # orbital rotations
+    for mat in orbital_rotations:
+        mat[:] = orbital_rotation_from_parameters(
+            params[index : index + norb**2], norb
+        )
+        index += norb**2
     # final orbital rotation
     final_orbital_rotation = None
     if with_final_orbital_rotation:
-        final_orbital_rotation_generator = np.zeros((norb, norb), dtype=complex)
-        # final orbital rotation, imaginary part
-        indices = triu_indices
-        n_params = len(indices)
-        rows, cols = zip(*indices)
-        vals = 1j * params[index : index + n_params]
-        final_orbital_rotation_generator[rows, cols] = vals
-        final_orbital_rotation_generator[cols, rows] = vals
-        index += n_params
-        # final orbital rotation, real part
-        indices = triu_indices_no_diag
-        n_params = len(indices)
-        rows, cols = zip(*indices)
-        vals = params[index : index + n_params]
-        final_orbital_rotation_generator[rows, cols] += vals
-        final_orbital_rotation_generator[cols, rows] -= vals
-        # exponentiate final orbital rotation generator
-        final_orbital_rotation = scipy.linalg.expm(final_orbital_rotation_generator)
+        final_orbital_rotation = orbital_rotation_from_parameters(params[index:], norb)
     return (
         diag_coulomb_mats_alpha_alpha,
         diag_coulomb_mats_alpha_beta,
@@ -511,7 +481,6 @@ def _ucj_to_parameters(
         List[Tuple[int, int]],
         list(itertools.combinations_with_replacement(range(norb), 2)),
     )
-    triu_indices_no_diag = list(itertools.combinations(range(norb), 2))
     if alpha_alpha_indices is None:
         alpha_alpha_indices = triu_indices
     if alpha_beta_indices is None:
@@ -519,7 +488,6 @@ def _ucj_to_parameters(
     ntheta = n_reps * (len(alpha_alpha_indices) + len(alpha_beta_indices) + norb**2)
     if final_orbital_rotation is not None:
         ntheta += norb**2
-    orbital_rotation_generators = [scipy.linalg.logm(mat) for mat in orbital_rotations]
     theta = np.zeros(ntheta)
     index = 0
     # diag coulomb matrices, alpha-alpha
@@ -536,28 +504,15 @@ def _ucj_to_parameters(
         for mat in diag_coulomb_mats_alpha_beta:
             theta[index : index + n_params] = mat[tuple(zip(*indices))]
             index += n_params
-    # orbital rotations, imaginary part
-    indices = triu_indices
-    n_params = len(indices)
-    for mat in orbital_rotation_generators:
-        theta[index : index + n_params] = mat[tuple(zip(*indices))].imag
-        index += n_params
-    # orbital rotations, real part
-    indices = triu_indices_no_diag
-    n_params = len(indices)
-    for mat in orbital_rotation_generators:
-        theta[index : index + n_params] = mat[tuple(zip(*indices))].real
-        index += n_params
+    # orbital rotations
+    for orbital_rotation in orbital_rotations:
+        theta[index : index + norb**2] = orbital_rotation_to_parameters(
+            orbital_rotation
+        )
+        index += norb**2
     # final orbital rotation
     if final_orbital_rotation is not None:
-        mat = scipy.linalg.logm(final_orbital_rotation)
-        # final orbital rotation, imaginary part
-        indices = triu_indices
-        n_params = len(indices)
-        theta[index : index + n_params] = mat[tuple(zip(*indices))].imag
-        index += n_params
-        # final orbital rotation, real part
-        indices = triu_indices_no_diag
-        n_params = len(indices)
-        theta[index : index + n_params] = mat[tuple(zip(*indices))].real
+        theta[index : index + norb**2] = orbital_rotation_to_parameters(
+            final_orbital_rotation
+        )
     return theta
diff --git a/python/ffsim/variational/util.py b/python/ffsim/variational/util.py
new file mode 100644
index 000000000..3c017a995
--- /dev/null
+++ b/python/ffsim/variational/util.py
@@ -0,0 +1,74 @@
+# (C) Copyright IBM 2023.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Variational ansatz utilities."""
+
+from __future__ import annotations
+
+import itertools
+
+import numpy as np
+import scipy.linalg
+
+
+def orbital_rotation_to_parameters(orbital_rotation: np.ndarray) -> np.ndarray:
+    """Convert an orbital rotation to parameters.
+
+    Converts an orbital rotation to a real-valued parameter vector. The parameter vector
+    contains non-redundant real and imaginary parts of the elements of the matrix
+    logarithm of the orbital rotation matrix.
+
+    Args:
+        orbital_rotation: The orbital rotation.
+
+    Returns:
+        The list of real numbers parameterizing the orbital rotation.
+    """
+    norb, _ = orbital_rotation.shape
+    triu_indices = list(itertools.combinations_with_replacement(range(norb), 2))
+    triu_indices_no_diag = list(itertools.combinations(range(norb), 2))
+    mat = scipy.linalg.logm(orbital_rotation)
+    params = np.zeros(norb**2)
+    # real part
+    params[: len(triu_indices_no_diag)] = mat[tuple(zip(*triu_indices_no_diag))].real
+    # imaginary part
+    params[len(triu_indices_no_diag) :] = mat[tuple(zip(*triu_indices))].imag
+    return params
+
+
+def orbital_rotation_from_parameters(params: np.ndarray, norb: int) -> np.ndarray:
+    """Construct an orbital rotation from parameters.
+
+    Converts a real-valued parameter vector to an orbital rotation. The parameter vector
+    contains non-redundant real and imaginary parts of the elements of the matrix
+    logarithm of the orbital rotation matrix.
+
+    Args:
+        params: The real-valued parameters.
+        norb: The number of spatial orbitals, which gives the width and height of the
+            orbital rotation matrix.
+
+    Returns:
+        The orbital rotation.
+    """
+    triu_indices = list(itertools.combinations_with_replacement(range(norb), 2))
+    triu_indices_no_diag = list(itertools.combinations(range(norb), 2))
+    generator = np.zeros((norb, norb), dtype=complex)
+    # imaginary part
+    vals = 1j * params[len(triu_indices_no_diag) :]
+    rows, cols = zip(*triu_indices)
+    generator[rows, cols] = vals
+    generator[cols, rows] = vals
+    # real part
+    vals = params[: len(triu_indices_no_diag)]
+    rows, cols = zip(*triu_indices_no_diag)
+    generator[rows, cols] += vals
+    generator[cols, rows] -= vals
+    return scipy.linalg.expm(generator)
diff --git a/tests/variational/hopgate_test.py b/tests/variational/hopgate_test.py
new file mode 100644
index 000000000..9c7440519
--- /dev/null
+++ b/tests/variational/hopgate_test.py
@@ -0,0 +1,56 @@
+# (C) Copyright IBM 2023.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Tests for hop gate ansatz."""
+
+import itertools
+
+import numpy as np
+from scipy.special import comb
+
+import ffsim
+
+
+def test_parameters_roundtrip():
+    norb = 5
+    n_reps = 2
+    rng = np.random.default_rng()
+
+    def ncycles(iterable, n):
+        "Returns the sequence elements n times"
+        return itertools.chain.from_iterable(itertools.repeat(tuple(iterable), n))
+
+    pairs = itertools.combinations(range(norb), 2)
+    interaction_pairs = list(ncycles(pairs, n_reps))
+    thetas = rng.uniform(-np.pi, np.pi, size=len(interaction_pairs))
+    final_orbital_rotation = ffsim.random.random_unitary(norb)
+
+    operator = ffsim.HopGateAnsatzOperator(
+        norb=norb,
+        interaction_pairs=interaction_pairs,
+        thetas=thetas,
+        final_orbital_rotation=final_orbital_rotation,
+    )
+    assert len(operator.to_parameters()) == n_reps * comb(norb, 2) + norb**2
+    roundtripped = ffsim.HopGateAnsatzOperator.from_parameters(
+        operator.to_parameters(),
+        norb=norb,
+        interaction_pairs=interaction_pairs,
+        with_final_orbital_rotation=True,
+    )
+
+    np.testing.assert_allclose(
+        roundtripped.thetas,
+        operator.thetas,
+    )
+    np.testing.assert_allclose(
+        roundtripped.final_orbital_rotation,
+        operator.final_orbital_rotation,
+    )
diff --git a/tests/variational/ucj_test.py b/tests/variational/ucj_test.py
index f279db289..681e669b9 100644
--- a/tests/variational/ucj_test.py
+++ b/tests/variational/ucj_test.py
@@ -8,6 +8,8 @@
 # copyright notice, and modified files need to carry a notice indicating
 # that they have been altered from the originals.
 
+"""Tests for unitary cluster Jastrow ansatz."""
+
 import numpy as np
 import pyscf
 import scipy.linalg