Skip to content

Commit

Permalink
add MolecularData.from_fcidump and deprecate MolecularHamiltonian.fci…
Browse files Browse the repository at this point in the history
…dump (#264)

* add MolecularData.from_fcidump and deprecate MolecularHamiltonian.fcidump

* in run methods, raise error if data is missing
  • Loading branch information
kevinsung authored Jul 2, 2024
1 parent cba0a0a commit 5e10811
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 38 deletions.
6 changes: 6 additions & 0 deletions python/ffsim/hamiltonians/molecular_hamiltonian.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from opt_einsum import contract
from pyscf.fci.direct_nosym import absorb_h1e, contract_1e, contract_2e, make_hdiag
from scipy.sparse.linalg import LinearOperator
from typing_extensions import deprecated

from ffsim._lib import FermionOperator
from ffsim.cistring import gen_linkstr_index
Expand Down Expand Up @@ -54,6 +55,11 @@ class MolecularHamiltonian:
constant: float = 0.0

@staticmethod
@deprecated(
"The MolecularHamiltonian.from_fcidump method is deprecated. "
"Instead, use MolecularData.from_fcidump and then access the `hamiltonian` "
"attribute of the returned MolecularData."
)
def from_fcidump(file: str | bytes | os.PathLike) -> MolecularHamiltonian:
"""Initialize a MolecularHamiltonian from an FCIDUMP file.
Expand Down
116 changes: 78 additions & 38 deletions python/ffsim/molecular_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import pyscf.mcscf
import pyscf.mp
import pyscf.symm
import pyscf.tools
from typing_extensions import deprecated

from ffsim.hamiltonians import MolecularHamiltonian
Expand All @@ -37,19 +38,20 @@ class MolecularData:
"""Class for storing molecular data.
Attributes:
atom (list[tuple[str, tuple[float, float, float]]]): The coordinates of the
atoms in the molecule.
basis (str): The basis set, e.g. "sto-6g".
spin (int): The spin of the molecule.
symmetry (str | None): The symmetry of the molecule.
norb (int): The number of spatial orbitals.
nelec (tuple[int, int]): The number of alpha and beta electrons.
mo_coeff (np.ndarray): Molecular orbital coefficients in the AO basis.
mo_occ (np.ndarray): Molecular orbital occupancies.
active_space (list[int]): The molecular orbitals included in the active space.
core_energy (float): The core energy.
one_body_integrals (np.ndarray): The one-body integrals.
two_body_integrals (np.ndarray): The two-body integrals in compressed format.
norb (int): The number of spatial orbitals.
nelec (tuple[int, int]): The number of alpha and beta electrons.
atom (list[tuple[str, tuple[float, float, float]]] | None): The coordinates of
the atoms in the molecule.
basis (str | None): The basis set, e.g. "sto-6g".
spin (int | None): The spin of the molecule.
symmetry (str | None): The symmetry of the molecule.
mo_coeff (np.ndarray | None): Molecular orbital coefficients in the AO basis.
mo_occ (np.ndarray | None): Molecular orbital occupancies.
active_space (list[int] | None): The molecular orbitals included in the active
space.
hf_energy (float | None): The Hartree-Fock energy.
hf_mo_coeff (np.ndarray | None): Hartree-Fock canonical orbital coefficients in
the AO basis.
Expand All @@ -68,21 +70,22 @@ class MolecularData:
orbital_symmetries (list[str] | None): The orbital symmetries.
"""

# molecule information corresponding to attributes of pyscf.gto.Mole
atom: list[tuple[str, tuple[float, float, float]]]
basis: str
spin: int
symmetry: str | None
# active space information
norb: int
nelec: tuple[int, int]
mo_coeff: np.ndarray
mo_occ: np.ndarray
active_space: list[int]
# molecular integrals
# Molecular integrals
core_energy: float
one_body_integrals: np.ndarray
two_body_integrals: np.ndarray
# Number of orbitals and numbers of alpha and beta electrons
norb: int
nelec: tuple[int, int]
# Molecule information corresponding to attributes of pyscf.gto.Mole
atom: list[tuple[str, tuple[float, float, float]]] | None = None
basis: str | None = None
spin: int | None = None
symmetry: str | None = None
# active space information
mo_coeff: np.ndarray | None = None
mo_occ: np.ndarray | None = None
active_space: list[int] | None = None
# Hartree-Fock data
hf_energy: float | None = None
hf_mo_coeff: np.ndarray | None = None
Expand Down Expand Up @@ -121,6 +124,27 @@ def mole(self) -> pyscf.gto.Mole:
symmetry=self.symmetry,
)

@staticmethod
def from_fcidump(file: str | bytes | os.PathLike) -> MolecularData:
"""Initialize a MolecularData from an FCIDUMP file.
Args:
file: The FCIDUMP file path.
"""
data = pyscf.tools.fcidump.read(file, verbose=False)
n_electrons = data["NELEC"]
spin = data["MS2"]
n_alpha = (n_electrons + spin) // 2
n_beta = (n_electrons - spin) // 2
return MolecularData(
core_energy=data["ECORE"],
one_body_integrals=data["H1"],
two_body_integrals=data["H2"],
norb=data["NORB"],
nelec=(n_alpha, n_beta),
spin=spin,
)

@staticmethod
def from_scf(
hartree_fock: pyscf.scf.hf.SCF, active_space: Iterable[int] | None = None
Expand Down Expand Up @@ -175,18 +199,18 @@ def from_scf(
)

return MolecularData(
core_energy=core_energy,
one_body_integrals=one_body_tensor,
two_body_integrals=two_body_integrals,
norb=norb,
nelec=(n_alpha, n_beta),
atom=mol.atom,
basis=mol.basis,
spin=mol.spin,
symmetry=mol.symmetry or None,
norb=norb,
nelec=(n_alpha, n_beta),
mo_coeff=hartree_fock.mo_coeff,
mo_occ=hartree_fock.mo_occ,
active_space=active_space,
core_energy=core_energy,
one_body_integrals=one_body_tensor,
two_body_integrals=two_body_integrals,
hf_energy=hf_energy,
dipole_integrals=dipole_integrals,
orbital_symmetries=orbsym,
Expand Down Expand Up @@ -215,6 +239,11 @@ def from_mole(

def run_fci(self, *, store_fci_vec: bool = False) -> None:
"""Run FCI and store results."""
if self.mo_coeff is None or self.mo_occ is None or self.active_space is None:
raise RuntimeError(
"The run_fci method requires the mo_coeff, mo_occ, and active_space "
"attributes to be set to non-None values."
)
n_alpha, n_beta = self.nelec
scf_func = pyscf.scf.RHF if n_alpha == n_beta else pyscf.scf.ROHF
scf = scf_func(self.mole)
Expand All @@ -229,6 +258,11 @@ def run_fci(self, *, store_fci_vec: bool = False) -> None:

def run_mp2(self, *, store_t2: bool = False):
"""Run MP2 and store results."""
if self.mo_coeff is None or self.mo_occ is None or self.active_space is None:
raise RuntimeError(
"The run_mp2 method requires the mo_coeff, mo_occ, and active_space "
"attributes to be set to non-None values."
)
n_alpha, n_beta = self.nelec
scf_func = pyscf.scf.RHF if n_alpha == n_beta else pyscf.scf.ROHF
scf = scf_func(self.mole)
Expand Down Expand Up @@ -266,6 +300,11 @@ def run_ccsd(
store_t2: bool = False,
) -> None:
"""Run CCSD and store results."""
if self.mo_coeff is None or self.mo_occ is None or self.active_space is None:
raise RuntimeError(
"The run_ccsd method requires the mo_coeff, mo_occ, and active_space "
"attributes to be set to non-None values."
)
n_alpha, n_beta = self.nelec
scf_func = pyscf.scf.RHF if n_alpha == n_beta else pyscf.scf.ROHF
scf = scf_func(self.mole)
Expand Down Expand Up @@ -356,22 +395,23 @@ def as_array_tuple_or_none(val):
nelec = tuple(data["nelec"])
n_alpha, n_beta = nelec
arrays_func = as_array_or_none if n_alpha == n_beta else as_array_tuple_or_none
atom = data.get("atom")
if atom is not None:
atom = [(element, tuple(coordinates)) for element, coordinates in atom]

return MolecularData(
atom=[
(element, tuple(coordinates)) for element, coordinates in data["atom"]
],
basis=data["basis"],
spin=data["spin"],
symmetry=data["symmetry"],
norb=data["norb"],
nelec=nelec,
mo_coeff=np.asarray(data["mo_coeff"]),
mo_occ=np.asarray(data["mo_occ"]),
active_space=data["active_space"],
core_energy=data["core_energy"],
one_body_integrals=np.asarray(data["one_body_integrals"]),
two_body_integrals=np.asarray(data["two_body_integrals"]),
norb=data["norb"],
nelec=nelec,
atom=atom,
basis=data.get("basis"),
spin=data.get("spin"),
symmetry=data.get("symmetry"),
mo_coeff=as_array_or_none(data.get("mo_coeff")),
mo_occ=as_array_or_none(data.get("mo_occ")),
active_space=data.get("active_space"),
hf_energy=data.get("hf_energy"),
hf_mo_coeff=as_array_or_none(data.get("hf_mo_coeff")),
hf_mo_occ=as_array_or_none(data.get("hf_mo_occ")),
Expand Down
1 change: 1 addition & 0 deletions tests/python/hamiltonians/molecular_hamiltonian_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def test_rotated():
np.testing.assert_allclose(original_expectation, rotated_expectation)


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_from_fcidump(tmp_path: pathlib.Path):
"""Test loading from FCIDUMP."""
mol = pyscf.gto.Mole()
Expand Down
51 changes: 51 additions & 0 deletions tests/python/molecular_data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,54 @@ def test_json_open_shell(tmp_path: pathlib.Path):
tmp_path / "test.json", compression=compression
)
_assert_mol_data_equal(loaded_mol_data, mol_data)


def test_from_fcidump(tmp_path: pathlib.Path):
"""Test loading from FCIDUMP."""
mol = pyscf.gto.Mole()
mol.build(
atom=[["N", (0, 0, 0)], ["N", (1.0, 0, 0)]],
basis="sto-6g",
symmetry="Dooh",
spin=0,
)
n_frozen = pyscf.data.elements.chemcore(mol)
active_space = range(n_frozen, mol.nao_nr())
scf = pyscf.scf.RHF(mol).run()
mol_data = ffsim.MolecularData.from_scf(scf, active_space=active_space)
pyscf.tools.fcidump.from_integrals(
tmp_path / "test.fcidump",
h1e=mol_data.one_body_integrals,
h2e=mol_data.two_body_integrals,
nuc=mol_data.core_energy,
nmo=mol_data.norb,
nelec=mol_data.nelec,
)
loaded_mol_data = ffsim.MolecularData.from_fcidump(tmp_path / "test.fcidump")
assert loaded_mol_data.norb == mol_data.norb
assert loaded_mol_data.nelec == mol_data.nelec
assert loaded_mol_data.spin == mol_data.spin
assert ffsim.approx_eq(loaded_mol_data.hamiltonian, mol_data.hamiltonian)

mol = pyscf.gto.Mole()
mol.build(
atom=[["H", (0, 0, 0)], ["O", (0, 0, 1.1)]],
basis="6-31g",
spin=1,
symmetry="Coov",
)
scf = pyscf.scf.ROHF(mol).run()
mol_data = ffsim.MolecularData.from_scf(scf, active_space=active_space)
pyscf.tools.fcidump.from_integrals(
tmp_path / "test.fcidump",
h1e=mol_data.one_body_integrals,
h2e=mol_data.two_body_integrals,
nuc=mol_data.core_energy,
nmo=mol_data.norb,
nelec=mol_data.nelec,
)
loaded_mol_data = ffsim.MolecularData.from_fcidump(tmp_path / "test.fcidump")
assert loaded_mol_data.norb == mol_data.norb
assert loaded_mol_data.nelec == mol_data.nelec
assert loaded_mol_data.spin == mol_data.spin
assert ffsim.approx_eq(loaded_mol_data.hamiltonian, mol_data.hamiltonian)

0 comments on commit 5e10811

Please sign in to comment.