From eeb80ff919a8fa65375474e6fd9d23fb0efddb7a Mon Sep 17 00:00:00 2001
From: "Kevin J. Sung" <kevjsung@umich.edu>
Date: Tue, 13 Feb 2024 09:18:09 -0500
Subject: [PATCH] MolecularData: factor out independent run methods (#99)

---
 docs/tutorials/04-lucj.ipynb                 |  69 +++++-----
 docs/tutorials/05-entanglement-forging.ipynb |  15 ++-
 python/ffsim/molecular_data.py               | 125 ++++++++++---------
 tests/gates/orbital_rotation_test.py         |   3 +-
 4 files changed, 111 insertions(+), 101 deletions(-)

diff --git a/docs/tutorials/04-lucj.ipynb b/docs/tutorials/04-lucj.ipynb
index 7dbe0dc36..66baacd15 100644
--- a/docs/tutorials/04-lucj.ipynb
+++ b/docs/tutorials/04-lucj.ipynb
@@ -19,7 +19,7 @@
      "output_type": "stream",
      "text": [
       "converged SCF energy = -77.4456267643962\n",
-      "CASCI E = -77.6290254326717  E(CI) = -3.57322412553862  S^2 = 0.0000000\n"
+      "CASCI E = -77.6290254326717  E(CI) = -3.57322412553863  S^2 = 0.0000000\n"
      ]
     }
    ],
@@ -53,12 +53,13 @@
     "active_space = range(mol.nelectron // 2 - 2, mol.nelectron // 2 + 2)\n",
     "\n",
     "# Get molecular data and molecular Hamiltonian (one- and two-body tensors)\n",
-    "mol_data = ffsim.MolecularData.from_scf(\n",
-    "    hartree_fock, active_space=active_space, fci=True\n",
-    ")\n",
+    "mol_data = ffsim.MolecularData.from_scf(hartree_fock, active_space=active_space)\n",
     "norb = mol_data.norb\n",
     "nelec = mol_data.nelec\n",
-    "mol_hamiltonian = mol_data.hamiltonian"
+    "mol_hamiltonian = mol_data.hamiltonian\n",
+    "\n",
+    "# Compute FCI energy\n",
+    "mol_data.run_fci()"
    ]
   },
   {
@@ -96,8 +97,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "E(CCSD) = -77.49387212754468  E_corr = -0.04824536314851346\n",
-      "Energy at initialization: -77.46975600021715\n"
+      "E(CCSD) = -77.49387212754473  E_corr = -0.04824536314851485\n",
+      "Energy at initialization: -77.46975600021644\n"
      ]
     }
    ],
@@ -150,10 +151,10 @@
       "  message: STOP: TOTAL NO. of ITERATIONS REACHED LIMIT\n",
       "  success: False\n",
       "   status: 1\n",
-      "      fun: -77.5096393678589\n",
-      "        x: [-1.486e-01  1.914e-01 ...  6.534e-03 -1.987e+00]\n",
+      "      fun: -77.50964466939116\n",
+      "        x: [-1.479e-01  1.876e-01 ...  3.824e-03  6.462e-01]\n",
       "      nit: 5\n",
-      "      jac: [-1.194e-03 -1.067e-03 ... -5.684e-04 -2.913e-04]\n",
+      "      jac: [ 1.333e-03 -2.134e-03 ... -5.372e-04 -3.055e-04]\n",
       "     nfev: 584\n",
       "     njev: 8\n",
       " hess_inv: <72x72 LbfgsInvHessProduct with dtype=float64>\n"
@@ -216,10 +217,10 @@
       "  message: STOP: TOTAL NO. of ITERATIONS REACHED LIMIT\n",
       "  success: False\n",
       "   status: 1\n",
-      "      fun: -77.45741314558897\n",
-      "        x: [-4.128e-01  8.249e-02 ... -3.067e-02 -1.935e+00]\n",
+      "      fun: -77.45739668895403\n",
+      "        x: [-4.078e-01  8.271e-02 ... -1.694e-02  7.034e-01]\n",
       "      nit: 5\n",
-      "      jac: [ 8.285e-04 -1.137e-05 ...  3.837e-05 -2.345e-04]\n",
+      "      jac: [ 1.815e-03 -2.700e-05 ...  3.553e-05 -8.384e-05]\n",
       "     nfev: 423\n",
       "     njev: 9\n",
       " hess_inv: <46x46 LbfgsInvHessProduct with dtype=float64>\n"
@@ -276,34 +277,34 @@
       "Number of parameters: 46\n",
       " message: Stop: Total number of iterations reached limit.\n",
       " success: False\n",
-      "     fun: -77.49749625463323\n",
-      "       x: [ 1.063e+00 -8.659e-02 ... -1.679e+01  2.643e+01]\n",
+      "     fun: -77.47158320347539\n",
+      "       x: [-6.631e-01  8.976e-02 ... -2.173e-01  9.995e-01]\n",
       "     nit: 5\n",
-      "     jac: [ 9.021e-03  1.711e-03 ...  6.221e-03 -5.828e-03]\n",
-      "    nfev: 856\n",
+      "     jac: [ 7.259e-03 -1.181e-03 ...  3.433e-03 -5.383e-03]\n",
+      "    nfev: 826\n",
       "    njev: 5\n",
-      "  nlinop: 626\n",
+      "  nlinop: 596\n",
       "\n",
       "Iteration 1\n",
-      "    Energy: -77.41517984348127\n",
-      "    Norm of gradient: 0.10920578601840765\n",
-      "    Regularization hyperparameter: 6.687981176805703e-07\n",
-      "    Variation hyperparameter: 0.696485198650709\n",
+      "    Energy: -77.45724086956001\n",
+      "    Norm of gradient: 0.010199505442545696\n",
+      "    Regularization hyperparameter: 0.026890960056319083\n",
+      "    Variation hyperparameter: 0.9931578556353272\n",
       "Iteration 2\n",
-      "    Energy: -77.4387582021023\n",
-      "    Norm of gradient: 0.10615951394818811\n",
-      "    Regularization hyperparameter: 0.037348668091859744\n",
-      "    Variation hyperparameter: 0.6955252427321973\n",
+      "    Energy: -77.4580971045074\n",
+      "    Norm of gradient: 0.0076454977282466455\n",
+      "    Regularization hyperparameter: 0.0006243190909239268\n",
+      "    Variation hyperparameter: 0.9931620654125893\n",
       "Iteration 3\n",
-      "    Energy: -77.46917421145808\n",
-      "    Norm of gradient: 0.058657242826842995\n",
-      "    Regularization hyperparameter: 0.005576706874662407\n",
-      "    Variation hyperparameter: 0.9943325006472397\n",
+      "    Energy: -77.45812347703834\n",
+      "    Norm of gradient: 0.007443152735598327\n",
+      "    Regularization hyperparameter: 1.0505969544705465\n",
+      "    Variation hyperparameter: 0.9931574003267095\n",
       "Iteration 4\n",
-      "    Energy: -77.48627050747123\n",
-      "    Norm of gradient: 0.05746841139489317\n",
-      "    Regularization hyperparameter: 0.015337740819403546\n",
-      "    Variation hyperparameter: 0.9941142453075373\n"
+      "    Energy: -77.46243342363793\n",
+      "    Norm of gradient: 0.02558853103196335\n",
+      "    Regularization hyperparameter: 0.0033642512286005625\n",
+      "    Variation hyperparameter: 0.9991474891911944\n"
      ]
     }
    ],
diff --git a/docs/tutorials/05-entanglement-forging.ipynb b/docs/tutorials/05-entanglement-forging.ipynb
index 959d17297..9cdccae62 100644
--- a/docs/tutorials/05-entanglement-forging.ipynb
+++ b/docs/tutorials/05-entanglement-forging.ipynb
@@ -18,7 +18,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "converged SCF energy = -75.6787887956297\n",
+      "converged SCF energy = -75.6787887956296\n",
       "CASCI E = -75.7288249991515  E(CI) = -23.6332495815006  S^2 = 0.0000000\n"
      ]
     }
@@ -53,10 +53,13 @@
     "active_space = range(1, mol.nao_nr())\n",
     "\n",
     "# Get molecular data and molecular Hamiltonian (one- and two-body tensors)\n",
-    "mol_data = ffsim.MolecularData.from_mole(mol, active_space=active_space, fci=True)\n",
+    "mol_data = ffsim.MolecularData.from_mole(mol, active_space=active_space)\n",
     "norb = mol_data.norb\n",
     "nelec = mol_data.nelec\n",
-    "mol_hamiltonian = mol_data.hamiltonian"
+    "mol_hamiltonian = mol_data.hamiltonian\n",
+    "\n",
+    "# Compute FCI energy\n",
+    "mol_data.run_fci()"
    ]
   },
   {
@@ -68,7 +71,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Energy at initialialization: -74.20656273321596\n"
+      "Energy at initialialization: -74.20656273321593\n"
      ]
     }
    ],
@@ -114,10 +117,10 @@
       "  message: STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT\n",
       "  success: False\n",
       "   status: 1\n",
-      "      fun: -75.68085225170408\n",
+      "      fun: -75.68085348348217\n",
       "        x: [ 2.996e+00 -7.549e-01 ...  2.650e+00  8.012e-01]\n",
       "      nit: 6\n",
-      "      jac: [ 1.756e-03  9.118e-03 ... -1.192e-02  9.521e-04]\n",
+      "      jac: [ 1.771e-03  9.121e-03 ... -1.192e-02  9.578e-04]\n",
       "     nfev: 112\n",
       "     njev: 7\n",
       " hess_inv: <15x15 LbfgsInvHessProduct with dtype=float64>\n"
diff --git a/python/ffsim/molecular_data.py b/python/ffsim/molecular_data.py
index 4cb9ea9a2..482f001f6 100644
--- a/python/ffsim/molecular_data.py
+++ b/python/ffsim/molecular_data.py
@@ -14,7 +14,8 @@
 from collections.abc import Iterable, Sequence
 
 import numpy as np
-from pyscf import ao2mo, cc, gto, mcscf, mp, scf, symm
+import pyscf.scf
+from pyscf import ao2mo, cc, gto, mcscf, mp, symm
 from pyscf.scf.hf import SCF
 
 from ffsim.hamiltonians import MolecularHamiltonian
@@ -53,7 +54,7 @@ def orbital_symmetries(hartree_fock: SCF, orbitals: Sequence[int]) -> list[int]
     return [MOLPRO_ID[hartree_fock.mol.groupname][i] for i in idx]
 
 
-@dataclasses.dataclass(frozen=True)
+@dataclasses.dataclass
 class MolecularData:
     """Class for storing molecular data.
 
@@ -62,10 +63,10 @@ class MolecularData:
         basis: The basis set, e.g. "sto-6g".
         spin: The spin of the molecule.
         symmetry: The symmetry of the molecule.
-        norb: The number of spatial orbitals.
-        nelec: The number of alpha and beta electrons.
         mo_coeff: Hartree-Fock canonical orbital coefficients in the AO basis.
         mo_occ: Hartree-Fock canonical orbital occupancies.
+        norb: The number of spatial orbitals.
+        nelec: The number of alpha and beta electrons.
         active_space: The orbitals included in the active space.
         core_energy: The core energy.
         one_body_tensor: The one-body tensor.
@@ -87,18 +88,18 @@ class MolecularData:
     basis: str
     spin: int
     symmetry: str | None
+    # Hartree-Fock data
+    mo_coeff: np.ndarray
+    mo_occ: np.ndarray
+    hf_energy: float
     # active space information
     norb: int
     nelec: tuple[int, int]
     active_space: list[int]
-    # Hamiltonian coefficients
+    # molecular integrals
     core_energy: float
     one_body_integrals: np.ndarray
     two_body_integrals: np.ndarray
-    # Hartree-Fock data
-    hf_energy: float
-    mo_coeff: np.ndarray
-    mo_occ: np.ndarray
     # MP2 data
     mp2_energy: float | None = None
     mp2_t2: np.ndarray | None = None
@@ -122,23 +123,29 @@ def hamiltonian(self) -> MolecularHamiltonian:
             constant=self.core_energy,
         )
 
+    @property
+    def mole(self) -> gto.Mole:
+        """The pySCF Mole class for this molecular data."""
+        mol = gto.Mole()
+        return mol.build(atom=self.atom, basis=self.basis, symmetry=self.symmetry)
+
+    @property
+    def scf(self) -> gto.Mole:
+        """The pySCF SCF class for this molecular data."""
+        hartree_fock = pyscf.scf.RHF(self.mole)
+        hartree_fock.mo_occ = self.mo_occ
+        hartree_fock.mo_coeff = self.mo_coeff
+        return hartree_fock
+
     @staticmethod
     def from_scf(
-        hartree_fock: SCF,
-        active_space: Iterable[int] | None = None,
-        *,
-        mp2: bool = False,
-        ccsd: bool = False,
-        fci: bool = False,
+        hartree_fock: SCF, active_space: Iterable[int] | None = None
     ) -> "MolecularData":
         """Initialize a MolecularData object from a Hartree-Fock calculation.
 
         Args:
             hartree_fock: The Hartree-Fock object.
             active_space: An optional list of orbitals to use for the active space.
-            mp2: Whether to calculate and store the MP2 energy and t2 amplitudes.
-            ccsd: Whether to calculate and store the CCSD energy, t1, and t2 amplitudes.
-            fci: Whether to calculate and store the FCI energy and state vector.
         """
         if not hartree_fock.e_tot:
             raise ValueError(
@@ -161,31 +168,6 @@ def from_scf(
         one_body_tensor, core_energy = cas.get_h1cas(mo)
         two_body_integrals = cas.get_h2cas(mo)
 
-        # run MP2 if requested
-        frozen = [i for i in range(hartree_fock.mol.nao_nr()) if i not in active_space]
-        mp2_energy = None
-        mp2_t2 = None
-        if mp2:
-            mp2_solver = mp.MP2(hartree_fock, frozen=frozen)
-            mp2_energy, mp2_t2 = mp2_solver.kernel()
-
-        # run CCSD if requested
-        ccsd_energy = None
-        ccsd_t1 = None
-        ccsd_t2 = None
-        if ccsd:
-            ccsd_solver = cc.CCSD(
-                hartree_fock,
-                frozen=frozen,
-            )
-            ccsd_energy, ccsd_t1, ccsd_t2 = ccsd_solver.kernel()
-
-        # run FCI if requested
-        fci_energy = None
-        fci_vec = None
-        if fci:
-            fci_energy, _, fci_vec, _, _ = cas.kernel()
-
         # compute dipole integrals
         charges = hartree_fock.mol.atom_charges()
         coords = hartree_fock.mol.atom_coords()
@@ -213,13 +195,6 @@ def from_scf(
             one_body_integrals=one_body_tensor,
             two_body_integrals=two_body_integrals,
             hf_energy=hf_energy,
-            mp2_energy=mp2_energy,
-            mp2_t2=mp2_t2,
-            ccsd_energy=ccsd_energy,
-            ccsd_t1=ccsd_t1,
-            ccsd_t2=ccsd_t2,
-            fci_energy=fci_energy,
-            fci_vec=fci_vec,
             dipole_integrals=dipole_integrals,
             orbital_symmetries=orbsym,
         )
@@ -228,23 +203,53 @@ def from_scf(
     def from_mole(
         molecule: gto.Mole,
         active_space: Iterable[int] | None = None,
-        mp2: bool = False,
-        ccsd: bool = False,
-        fci: bool = False,
-        scf_func=scf.RHF,
+        scf_func=pyscf.scf.RHF,
     ) -> "MolecularData":
         """Initialize a MolecularData object from a pySCF molecule.
 
         Args:
             molecule: The molecule.
             active_space: An optional list of orbitals to use for the active space.
-            mp2: Whether to calculate and store the MP2 energy and t2 amplitudes.
-            ccsd: Whether to calculate and store the CCSD energy, t1, and t2 amplitudes.
-            fci: Whether to calculate and store the FCI energy and state vector.
             scf_func: The pySCF SCF function to use for the Hartree-Fock calculation.
         """
         hartree_fock = scf_func(molecule)
         hartree_fock.run()
-        return MolecularData.from_scf(
-            hartree_fock, active_space=active_space, mp2=mp2, ccsd=ccsd, fci=fci
-        )
+        return MolecularData.from_scf(hartree_fock, active_space=active_space)
+
+    def run_mp2(self, *, store_t2: bool = False):
+        """Run MP2 and store results."""
+        cas = mcscf.CASCI(self.scf, ncas=self.norb, nelecas=self.nelec)
+        mo = cas.sort_mo(self.active_space, mo_coeff=self.mo_coeff, base=0)
+        frozen = [i for i in range(self.norb) if i not in self.active_space]
+        mp2_solver = mp.MP2(self.scf, frozen=frozen)
+        mp2_energy, mp2_t2 = mp2_solver.kernel(mo_coeff=mo)
+        self.mp2_energy = mp2_energy
+        if store_t2:
+            self.mp2_t2 = mp2_t2
+
+    def run_fci(self, *, store_fci_vec: bool = False) -> None:
+        """Run FCI and store results."""
+        cas = mcscf.CASCI(self.scf, ncas=self.norb, nelecas=self.nelec)
+        mo = cas.sort_mo(self.active_space, mo_coeff=self.mo_coeff, base=0)
+        fci_energy, _, fci_vec, _, _ = cas.kernel(mo_coeff=mo)
+        self.fci_energy = fci_energy
+        if store_fci_vec:
+            self.fci_vec = fci_vec
+
+    def run_ccsd(
+        self,
+        t1: np.ndarray | None = None,
+        t2: np.ndarray | None = None,
+        *,
+        store_t1: bool = False,
+        store_t2: bool = False,
+    ) -> None:
+        """Run CCSD and store results."""
+        frozen = [i for i in range(self.norb) if i not in self.active_space]
+        ccsd_solver = cc.CCSD(self.scf, frozen=frozen)
+        ccsd_energy, ccsd_t1, ccsd_t2 = ccsd_solver.kernel(t1=t1, t2=t2)
+        self.ccsd_energy = ccsd_energy + self.hf_energy
+        if store_t1:
+            self.ccsd_t1 = ccsd_t1
+        if store_t2:
+            self.ccsd_t2 = ccsd_t2
diff --git a/tests/gates/orbital_rotation_test.py b/tests/gates/orbital_rotation_test.py
index 09dcc9665..b20d79871 100644
--- a/tests/gates/orbital_rotation_test.py
+++ b/tests/gates/orbital_rotation_test.py
@@ -274,7 +274,8 @@ def test_apply_orbital_rotation_nitrogen():
         symmetry="d2h",
     )
     active_space = range(2, mol.nao_nr())
-    mol_data = ffsim.MolecularData.from_mole(mol, active_space=active_space, ccsd=True)
+    mol_data = ffsim.MolecularData.from_mole(mol, active_space=active_space)
+    mol_data.run_ccsd(store_t1=True, store_t2=True)
     norb = mol_data.norb
     nelec = mol_data.nelec
     t1 = mol_data.ccsd_t1