Skip to content

Commit

Permalink
Support JSON serialization for MolecularData (#258)
Browse files Browse the repository at this point in the history
* add JSON serialization for MolecularData

* add orjson as dependency

* fix frozen bug in mp2

* support compression

* fix frozen bug in ccsd

* fix open shell for mp2 and ccsd

* fix open-shell fci

* use data.get

* support open shell

* fix test assert function

* mypy

* reorder functions

* mypy

* doc
  • Loading branch information
kevinsung authored Jun 27, 2024
1 parent 58aaf3a commit db88eca
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 0 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ classifiers = [
dependencies = [
"numpy",
"opt_einsum",
"orjson",
"pyscf >= 2.4",
"qiskit >= 1.1",
"scipy",
Expand Down
99 changes: 99 additions & 0 deletions python/ffsim/molecular_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,16 @@

from __future__ import annotations

import bz2
import dataclasses
import gzip
import lzma
import os
from collections.abc import Iterable
from typing import Callable

import numpy as np
import orjson
import pyscf
import pyscf.cc
import pyscf.mcscf
Expand Down Expand Up @@ -281,3 +287,96 @@ def run_ccsd(
self.ccsd_t1 = ccsd_t1
if store_t2:
self.ccsd_t2 = ccsd_t2

def to_json(
self, file: str | bytes | os.PathLike, compression: str | None = None
) -> None:
"""Serialize to JSON format, optionally compressed, and save to disk.
Args:
file: The file path to save to.
compression: The optional compression algorithm to use.
Options: ``"gzip"``, ``"bz2"``, ``"lzma"``.
"""

def default(obj):
if isinstance(obj, np.ndarray):
return np.ascontiguousarray(obj)
raise TypeError

open_func: dict[str | None, Callable] = {
None: open,
"gzip": gzip.open,
"bz2": bz2.open,
"lzma": lzma.open,
}
with open_func[compression](file, "wb") as f:
f.write(
orjson.dumps(self, option=orjson.OPT_SERIALIZE_NUMPY, default=default)
)

@staticmethod
def from_json(
file: str | bytes | os.PathLike, compression: str | None = None
) -> MolecularData:
"""Load a MolecularData from a (possibly compressed) JSON file.
Args:
file: The file path to read from.
compression: The compression algorithm, if any, which was used to compress
the file.
Options: ``"gzip"``, ``"bz2"``, ``"lzma"``.
Returns: The MolecularData object.
"""
open_func: dict[str | None, Callable] = {
None: open,
"gzip": gzip.open,
"bz2": bz2.open,
"lzma": lzma.open,
}
with open_func[compression](file, "rb") as f:
data = orjson.loads(f.read())

def as_array_or_none(val):
if val is None:
return None
return np.asarray(val)

def as_array_tuple_or_none(val):
if val is None:
return None
return tuple(np.asarray(arr) for arr in val)

nelec = tuple(data["nelec"])
n_alpha, n_beta = nelec
arrays_func = as_array_or_none if n_alpha == n_beta else as_array_tuple_or_none

return MolecularData(
atom=[
(element, tuple(coordinates)) for element, coordinates in data["atom"]
],
basis=data["basis"],
spin=data["spin"],
symmetry=data["symmetry"],
norb=data["norb"],
nelec=nelec,
mo_coeff=np.asarray(data["mo_coeff"]),
mo_occ=np.asarray(data["mo_occ"]),
active_space=data["active_space"],
core_energy=data["core_energy"],
one_body_integrals=np.asarray(data["one_body_integrals"]),
two_body_integrals=np.asarray(data["two_body_integrals"]),
hf_energy=data.get("hf_energy"),
hf_mo_coeff=as_array_or_none(data.get("hf_mo_coeff")),
hf_mo_occ=as_array_or_none(data.get("hf_mo_occ")),
mp2_energy=data.get("mp2_energy"),
mp2_t2=arrays_func(data.get("mp2_t2")),
ccsd_energy=data.get("ccsd_energy"),
ccsd_t1=arrays_func(data.get("ccsd_t1")),
ccsd_t2=arrays_func(data.get("ccsd_t2")),
fci_energy=data.get("fci_energy"),
fci_vec=as_array_or_none(data.get("fci_vec")),
dipole_integrals=as_array_or_none(data.get("dipole_integrals")),
orbital_symmetries=data.get("orbital_symmetries"),
)
77 changes: 77 additions & 0 deletions tests/python/molecular_data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,43 @@
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

import dataclasses
import pathlib

import numpy as np
import pyscf
import pyscf.data.elements

import ffsim


def _assert_mol_data_equal(
actual_mol_data: ffsim.MolecularData, expected_mol_data: ffsim.MolecularData
):
for field in dataclasses.fields(actual_mol_data):
actual = getattr(actual_mol_data, field.name)
expected = getattr(expected_mol_data, field.name)
if field.type == "np.ndarray":
assert isinstance(actual, np.ndarray)
np.testing.assert_array_equal(actual, expected)
elif field.type in [
"np.ndarray | None",
"np.ndarray | tuple[np.ndarray, np.ndarray] | None",
"np.ndarray | tuple[np.ndarray, np.ndarray, np.ndarray] | None",
]:
if actual is not None:
if isinstance(actual, tuple):
for actual_val, expected_val in zip(actual, expected):
assert isinstance(actual_val, np.ndarray)
assert isinstance(expected_val, np.ndarray)
np.testing.assert_array_equal(actual_val, expected_val)
else:
assert isinstance(actual, np.ndarray)
np.testing.assert_array_equal(actual, expected)
else:
assert actual == expected


def test_molecular_data_sym():
# Build N2 molecule
mol = pyscf.gto.Mole()
Expand Down Expand Up @@ -88,3 +118,50 @@ def test_molecular_data_run_methods():
np.testing.assert_allclose(mol_data.mp2_energy, -108.58852784026)
np.testing.assert_allclose(mol_data.fci_energy, -108.595987350986)
np.testing.assert_allclose(mol_data.ccsd_energy, -108.5933309085008)


def test_json_closed_shell(tmp_path: pathlib.Path):
"""Test saving to and loading from JSON for a closed-shell molecule."""
mol = pyscf.gto.Mole()
mol.build(
atom=[("N", (0, 0, 0)), ("N", (1.0, 0, 0))],
basis="sto-6g",
symmetry="Dooh",
)
n_frozen = pyscf.data.elements.chemcore(mol)
active_space = range(n_frozen, mol.nao_nr())
scf = pyscf.scf.RHF(mol).run()
mol_data = ffsim.MolecularData.from_scf(scf, active_space=active_space)
mol_data.run_mp2(store_t2=True)
mol_data.run_ccsd(store_t1=True, store_t2=True)
mol_data.run_fci(store_fci_vec=True)

for compression in [None, "gzip", "bz2", "lzma"]:
mol_data.to_json(tmp_path / "test.json", compression=compression)
loaded_mol_data = ffsim.MolecularData.from_json(
tmp_path / "test.json", compression=compression
)
_assert_mol_data_equal(loaded_mol_data, mol_data)


def test_json_open_shell(tmp_path: pathlib.Path):
"""Test saving to and loading from JSON for an open-shell molecule."""
mol = pyscf.gto.Mole()
mol.build(
atom=[("H", (0, 0, 0)), ("O", (0, 0, 1.1))],
basis="6-31g",
spin=1,
symmetry="Coov",
)
scf = pyscf.scf.ROHF(mol).run()
mol_data = ffsim.MolecularData.from_scf(scf)
mol_data.run_mp2(store_t2=True)
mol_data.run_ccsd(store_t1=True, store_t2=True)
mol_data.run_fci(store_fci_vec=True)

for compression in [None, "gzip", "bz2", "lzma"]:
mol_data.to_json(tmp_path / "test.json", compression=compression)
loaded_mol_data = ffsim.MolecularData.from_json(
tmp_path / "test.json", compression=compression
)
_assert_mol_data_equal(loaded_mol_data, mol_data)

0 comments on commit db88eca

Please sign in to comment.