From 3cb843d4f48654d1734c1edc70a4b51827caca27 Mon Sep 17 00:00:00 2001
From: "Kevin J. Sung" <kevjsung@umich.edu>
Date: Tue, 2 Apr 2024 22:29:38 -0400
Subject: [PATCH] Document Givens decomposition (#122)

* add givens rotation namedtuple

* use givens rotation namedtuple (breaking change)

* document givens decomposition

* python 3.8 doesn't have @=

* remove unnecessary type annotation
---
 python/ffsim/gates/orbital_rotation.py |  8 +--
 python/ffsim/linalg/__init__.py        |  7 +-
 python/ffsim/linalg/givens.py          | 94 ++++++++++++++++++++++++--
 tests/python/linalg/givens_test.py     | 46 ++++++++++---
 4 files changed, 135 insertions(+), 20 deletions(-)

diff --git a/python/ffsim/gates/orbital_rotation.py b/python/ffsim/gates/orbital_rotation.py
index 4eca66fab..60d50f809 100644
--- a/python/ffsim/gates/orbital_rotation.py
+++ b/python/ffsim/gates/orbital_rotation.py
@@ -232,9 +232,9 @@ def _apply_orbital_rotation_givens(
 
     if spin & Spin.ALPHA:
         # transform alpha
-        for (c, s), target_orbs in givens_rotations:
+        for c, s, i, j in givens_rotations:
             _apply_orbital_rotation_adjacent_spin_in_place(
-                vec, c, s.conjugate(), target_orbs, norb, n_alpha
+                vec, c, s.conjugate(), (i, j), norb, n_alpha
             )
         for i, phase_shift in enumerate(phase_shifts):
             indices = _one_subspace_indices(norb, n_alpha, (i,))
@@ -244,9 +244,9 @@ def _apply_orbital_rotation_givens(
         # transform beta
         # transpose vector to align memory layout
         vec = vec.T.copy()
-        for (c, s), target_orbs in givens_rotations:
+        for c, s, i, j in givens_rotations:
             _apply_orbital_rotation_adjacent_spin_in_place(
-                vec, c, s.conjugate(), target_orbs, norb, n_beta
+                vec, c, s.conjugate(), (i, j), norb, n_beta
             )
         for i, phase_shift in enumerate(phase_shifts):
             indices = _one_subspace_indices(norb, n_beta, (i,))
diff --git a/python/ffsim/linalg/__init__.py b/python/ffsim/linalg/__init__.py
index 8611fe503..dc61162bb 100644
--- a/python/ffsim/linalg/__init__.py
+++ b/python/ffsim/linalg/__init__.py
@@ -15,7 +15,11 @@
     double_factorized_t2,
     modified_cholesky,
 )
-from ffsim.linalg.givens import apply_matrix_to_slices, givens_decomposition
+from ffsim.linalg.givens import (
+    GivensRotation,
+    apply_matrix_to_slices,
+    givens_decomposition,
+)
 from ffsim.linalg.linalg import (
     expm_multiply_taylor,
     lup,
@@ -32,6 +36,7 @@
 )
 
 __all__ = [
+    "GivensRotation",
     "apply_matrix_to_slices",
     "double_factorized",
     "double_factorized_t2",
diff --git a/python/ffsim/linalg/givens.py b/python/ffsim/linalg/givens.py
index 2c588bfce..65b5b8717 100644
--- a/python/ffsim/linalg/givens.py
+++ b/python/ffsim/linalg/givens.py
@@ -13,12 +13,35 @@
 from __future__ import annotations
 
 import cmath
+from typing import NamedTuple
 
 import numpy as np
 from scipy.linalg.blas import zrotg as zrotg_
 from scipy.linalg.lapack import zrot
 
 
+class GivensRotation(NamedTuple):
+    r"""A Givens rotation.
+
+    A Givens rotation acts on the two-dimensional subspace spanned by the :math:`i`-th
+    and :math:`j`-th basis vectors as
+
+    .. math::
+
+        \begin{pmatrix}
+            c & s \\
+            -s^* & c \\
+        \end{pmatrix}
+
+    where :math:`c` is a real number and :math:`s` is a complex number.
+    """
+
+    c: float
+    s: complex
+    i: int
+    j: int
+
+
 def apply_matrix_to_slices(
     target: np.ndarray,
     mat: np.ndarray,
@@ -69,12 +92,65 @@ def zrotg(a: complex, b: complex, tol=1e-12) -> tuple[float, complex]:
 
 def givens_decomposition(
     mat: np.ndarray,
-) -> tuple[list[tuple[tuple[float, complex], tuple[int, int]]], np.ndarray]:
-    """Givens rotation decomposition of a unitary matrix."""
+) -> tuple[list[GivensRotation], np.ndarray]:
+    r"""Givens rotation decomposition of a unitary matrix.
+
+    The Givens rotation decomposition of an :math:`n \times n` unitary matrix :math:`U`
+    is given by
+
+    .. math::
+
+        U = D G_L^* G_{L-1}^* \cdots G_1^*
+
+    where :math:`D` is a diagonal matrix and each :math:`G_k` is a Givens rotation.
+    Here, the star :math:`*` denotes the element-wise complex conjugate.
+    A Givens rotation acts on the two-dimensional subspace spanned by the :math:`i`-th
+    and :math:`j`-th basis vectors as
+
+    .. math::
+
+        \begin{pmatrix}
+            c & s \\
+            -s^* & c \\
+        \end{pmatrix}
+
+    where :math:`c` is a real number and :math:`s` is a complex number.
+    Therefore, a Givens rotation is described by a 4-tuple
+    :math:`(c, s, i, j)`, where :math:`c` and :math:`s` are the numbers appearing
+    in the rotation matrix, and :math:`i` and :math:`j` are the
+    indices of the basis vectors of the subspace being rotated.
+    This function always returns Givens rotations with the property that
+    :math:`i` and :math:`j` differ by at most one, that is, either :math:`j = i + 1`
+    or :math:`j = i - 1`.
+
+    The number of Givens rotations :math:`L` is at most :math:`\frac{n (n-1)}{2}`,
+    but it may be less. If we think of Givens rotations acting on disjoint indices
+    as operations that can be performed in parallel, then the entire sequence of
+    rotations can always be performed using at most `n` layers of parallel operations.
+    The decomposition algorithm is described in :ref:`[1] <reference>`.
+
+    .. _reference:
+    
+    [1] William R. Clements et al.
+    `Optimal design for universal multiport interferometers`_.
+
+    Args:
+        mat: The unitary matrix to decompose into Givens rotations.
+
+    Returns:
+        - A list containing the Givens rotations :math:`G_1, \ldots, G_L`.
+          Each Givens rotation is represented as a 4-tuple
+          :math:`(c, s, i, j)`, where :math:`c` and :math:`s` are the numbers appearing
+          in the rotation matrix, and :math:`i` and :math:`j` are the
+          indices of the basis vectors of the subspace being rotated.
+        - A Numpy array containing the diagonal elements of the matrix :math:`D`.
+
+    .. _Optimal design for universal multiport interferometers: https://doi.org/10.1364/OPTICA.3.001460
+    """
     n, _ = mat.shape
     current_matrix = mat.astype(complex, copy=True)
     left_rotations = []
-    right_rotations: list[tuple[tuple[float, complex], tuple[int, int]]] = []
+    right_rotations = []
 
     # compute left and right Givens rotations
     for i in range(n - 1):
@@ -89,7 +165,9 @@ def givens_decomposition(
                         current_matrix[row, target_index + 1],
                         current_matrix[row, target_index],
                     )
-                    right_rotations.append(((c, s), (target_index + 1, target_index)))
+                    right_rotations.append(
+                        GivensRotation(c, s, target_index + 1, target_index)
+                    )
                     current_matrix = current_matrix.T.copy()
                     (
                         current_matrix[target_index + 1],
@@ -112,7 +190,9 @@ def givens_decomposition(
                         current_matrix[target_index - 1, col],
                         current_matrix[target_index, col],
                     )
-                    left_rotations.append(((c, s), (target_index - 1, target_index)))
+                    left_rotations.append(
+                        GivensRotation(c, s, target_index - 1, target_index)
+                    )
                     (
                         current_matrix[target_index - 1],
                         current_matrix[target_index],
@@ -124,9 +204,9 @@ def givens_decomposition(
                     )
 
     # convert left rotations to right rotations
-    for (c, s), (i, j) in reversed(left_rotations):
+    for c, s, i, j in reversed(left_rotations):
         c, s = zrotg(c * current_matrix[j, j], s.conjugate() * current_matrix[i, i])
-        right_rotations.append(((c, -s.conjugate()), (i, j)))
+        right_rotations.append(GivensRotation(c, -s.conjugate(), i, j))
 
         givens_mat = np.array([[c, -s], [s.conjugate(), c]])
         givens_mat[:, 0] *= current_matrix[i, i]
diff --git a/tests/python/linalg/givens_test.py b/tests/python/linalg/givens_test.py
index 00468ac94..304584ddd 100644
--- a/tests/python/linalg/givens_test.py
+++ b/tests/python/linalg/givens_test.py
@@ -15,36 +15,66 @@
 from pathlib import Path
 
 import numpy as np
+import pytest
 from scipy.linalg.lapack import zrot
 
 import ffsim
 from ffsim.linalg import givens_decomposition
 
 
-def test_givens_decomposition():
-    dim = 5
+@pytest.mark.parametrize("dim", range(6))
+def test_givens_decomposition_definition(dim: int):
+    """Test Givens decomposition definition."""
     rng = np.random.default_rng()
-    for _ in range(5):
+    for _ in range(3):
+        mat = ffsim.random.random_unitary(dim, seed=rng)
+        givens_rotations, phase_shifts = givens_decomposition(mat)
+        reconstructed = np.diag(phase_shifts)
+        for c, s, i, j in givens_rotations[::-1]:
+            givens_mat = np.eye(dim, dtype=complex)
+            givens_mat[np.ix_((i, j), (i, j))] = [
+                [c, s],
+                [-s.conjugate(), c],
+            ]
+            # TODO use in-place operator @= after Python 3.9
+            reconstructed = reconstructed @ givens_mat.conj()
+        np.testing.assert_allclose(reconstructed, mat)
+        assert len(givens_rotations) == dim * (dim - 1) // 2
+
+
+@pytest.mark.parametrize("dim", range(6))
+def test_givens_decomposition_reconstruct(dim: int):
+    """Test Givens decomposition reconstruction of original matrix."""
+    rng = np.random.default_rng()
+    for _ in range(3):
         mat = ffsim.random.random_unitary(dim, seed=rng)
         givens_rotations, phase_shifts = givens_decomposition(mat)
         reconstructed = np.eye(dim, dtype=complex)
         for i, phase_shift in enumerate(phase_shifts):
             reconstructed[i] *= phase_shift
-        for (c, s), (i, j) in givens_rotations[::-1]:
+        for c, s, i, j in givens_rotations[::-1]:
             reconstructed = reconstructed.T.copy()
             reconstructed[j], reconstructed[i] = zrot(
                 reconstructed[j], reconstructed[i], c, s.conjugate()
             )
             reconstructed = reconstructed.T
-
         np.testing.assert_allclose(reconstructed, mat)
 
 
-def test_givens_decomposition_no_side_effects():
+@pytest.mark.parametrize("dim", range(6))
+def test_givens_decomposition_identity(dim: int):
+    """Test Givens decomposition on identity matrix."""
+    mat = np.eye(dim)
+    givens_rotations, phase_shifts = givens_decomposition(mat)
+    assert all(phase_shifts == 1)
+    assert len(givens_rotations) == 0
+
+
+@pytest.mark.parametrize("norb", range(6))
+def test_givens_decomposition_no_side_effects(norb: int):
     """Test that the Givens decomposition doesn't modify the original matrix."""
-    norb = 8
     rng = np.random.default_rng()
-    for _ in range(5):
+    for _ in range(3):
         mat = ffsim.random.random_unitary(norb, seed=rng)
         original_mat = mat.copy()
         _ = givens_decomposition(mat)