Skip to content

Commit

Permalink
add hashes and rearrange mult handling (MolSSI#343)
Browse files Browse the repository at this point in the history
* add hashes and rearrange mult handling

* more frag mult testing

* foramt

* add some float charges tests
  • Loading branch information
loriab authored Sep 18, 2024
1 parent b687499 commit 936407e
Show file tree
Hide file tree
Showing 6 changed files with 322 additions and 113 deletions.
22 changes: 18 additions & 4 deletions qcelemental/models/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,14 +407,28 @@ def _populate_real(cls, v, values, **kwargs):
v = np.array([True for _ in range(n)])
return v

@validator("fragment_charges_", "fragment_multiplicities_")
@validator("fragment_charges_")
def _must_be_n_frag(cls, v, values, **kwargs):
if "fragments_" in values and values["fragments_"] is not None:
n = len(values["fragments_"])
if len(v) != n:
raise ValueError(
"Fragment Charges and Fragment Multiplicities must be same number of entries as Fragments"
)
raise ValueError("Fragment Charges must be same number of entries as Fragments")
return v

@validator("fragment_multiplicities_")
def _must_be_n_frag_mult(cls, v, values, **kwargs):
if "fragments_" in values and values["fragments_"] is not None:
n = len(values["fragments_"])
if len(v) != n:
raise ValueError("Fragment Multiplicities must be same number of entries as Fragments")
if any([m < 1.0 for m in v]):
raise ValueError(f"Fragment Multiplicity must be positive: {v}")
return v

@validator("molecular_multiplicity")
def _int_if_possible(cls, v, values, **kwargs):
if v < 1.0:
raise ValueError("Molecular Multiplicity must be positive")
return v

@property
Expand Down
13 changes: 13 additions & 0 deletions qcelemental/molparse/chgmult.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,19 @@ def validate_and_fill_chgmult(
log_brief = verbose >= 2 # TODO: Move back to 1
text = []

def int_if_possible(val):
if isinstance(val, float) and val.is_integer():
return int(val)
else:
return val

molecular_multiplicity = int_if_possible(molecular_multiplicity)
fragment_multiplicities = [int_if_possible(m) for m in fragment_multiplicities]
if (molecular_multiplicity and molecular_multiplicity < 1.0) or any(m < 1.0 for m in fragment_multiplicities if m):
raise ValidationError(
f"validate_and_fill_chgmult(): Multiplicity must be positive. m: {molecular_multiplicity}, fm: {fragment_multiplicities}"
)

felez = np.split(zeff, fragment_separators)
nfr = len(felez)
if log_full:
Expand Down
13 changes: 8 additions & 5 deletions qcelemental/molparse/from_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,12 +753,15 @@ def validate_and_fill_fragments(nat, fragment_separators=None, fragment_charges=

if fragment_multiplicities is None:
frm = [None] * nfr
elif all(f is None or (isinstance(f, (int, np.integer)) and f >= 1) for f in fragment_multiplicities):
frm = fragment_multiplicities
else:
raise ValidationError(
"""fragment_multiplicities not among None or positive integer: {}""".format(fragment_multiplicities)
)
# positive-ness checks and integer-if-possible casting now deferred to validate_and_fill_chgmult()
# to match molecular_multiplicities handling
try:
frm = [(f if f is None else float(f)) for f in fragment_multiplicities]
except TypeError:
raise ValidationError(
"""fragment_multiplicities not among None or float: {}""".format(fragment_charges)
)

if not (len(frc) == len(frm) == len(frs) + 1):
raise ValidationError(
Expand Down
134 changes: 134 additions & 0 deletions qcelemental/tests/test_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,16 +367,20 @@ def test_water_orient():

# Make sure the fragments match
assert frag_0.get_hash() == frag_1.get_hash()
assert frag_0.get_hash() == "d0b499739f763e8d3a5556b4ddaeded6a148e4d5"

# Make sure the complexes match
frag_0_1 = mol.get_fragment(0, 1, orient=True, group_fragments=True)
frag_1_0 = mol.get_fragment(1, 0, orient=True, group_fragments=True)
assert frag_0_1.get_hash() == frag_1_0.get_hash()
assert frag_0_1.get_hash() == "bd23a8a5e48a3a6a32011559fdddc958bb70343b"

# Fragments not reordered, should be different molecules.
frag_0_1 = mol.get_fragment(0, 1, orient=True, group_fragments=False)
frag_1_0 = mol.get_fragment(1, 0, orient=True, group_fragments=False)
assert frag_0_1.get_hash() != frag_1_0.get_hash()
assert frag_0_1.get_hash() == "bd23a8a5e48a3a6a32011559fdddc958bb70343b"
assert frag_1_0.get_hash() == "9ed8bdc4ae559c20816d65225fdb1ae3c29d149f"

# These are identical molecules, but should be different with ghost
mol = Molecule.from_data(
Expand All @@ -399,6 +403,7 @@ def test_water_orient():
# Make sure the fragments match
assert frag_0.molecular_multiplicity == 1
assert frag_0.get_hash() == frag_1.get_hash()
assert frag_0.get_hash() == "77b272802d61b578b1c65bb87747a89e53e015a7"

# Make sure the complexes match
frag_0_1 = mol.get_fragment(0, 1, orient=True)
Expand All @@ -407,6 +412,8 @@ def test_water_orient():
# Ghost fragments should prevent overlap
assert frag_0_1.molecular_multiplicity == 1
assert frag_0_1.get_hash() != frag_1_0.get_hash()
assert frag_0_1.get_hash() == "4a4cd4d0ab0eef8fed2221fb692c3b1fbf4834de"
assert frag_1_0.get_hash() == "4cc0b30f9f50dd85f4f2036a683865bf17ded803"


def test_molecule_errors_extra():
Expand Down Expand Up @@ -522,10 +529,12 @@ def test_get_fragment(group_fragments, orient):
assert dimers[0].get_hash() != dimers[3].get_hash() # atoms out of order
assert dimers[1].get_hash() != dimers[4].get_hash() # atoms out of order
assert dimers[2].get_hash() == dimers[5].get_hash()
assert dimers[5].get_hash() == "f1d6551f95ce9467dbcce7c48e11bb98d0f1fb98"
elif not group_fragments and not orient:
assert dimers[0].get_hash() == dimers[3].get_hash()
assert dimers[1].get_hash() == dimers[4].get_hash()
assert dimers[2].get_hash() == dimers[5].get_hash()
assert dimers[5].get_hash() == "1bd9100e99748a0c34b01cef558ea5cf4ae6ab85"
else:
assert 0

Expand All @@ -541,11 +550,13 @@ def test_get_fragment(group_fragments, orient):
assert ghdimers[0].get_hash() != ghdimers[3].get_hash() # diff atoms ghosted
assert ghdimers[1].get_hash() != ghdimers[4].get_hash() # diff atoms ghosted
assert ghdimers[2].get_hash() == ghdimers[5].get_hash()
assert ghdimers[5].get_hash() == "bd23a8a5e48a3a6a32011559fdddc958bb70343b"
elif not group_fragments and not orient:
assert ghdimers[0].get_hash() != ghdimers[3].get_hash() # diff atoms ghosted
assert ghdimers[1].get_hash() != ghdimers[4].get_hash() # diff atoms ghosted
assert ghdimers[2].get_hash() != ghdimers[5].get_hash() # real pattern different
assert not np.allclose(ghdimers[2].real, ghdimers[5].real)
assert ghdimers[5].get_hash() == "9d1fd57e90735a47af4156e1d72b7e8e78fb44eb"
else:
assert 0

Expand All @@ -564,6 +575,7 @@ def test_molecule_repeated_hashing():
)

h1 = mol.get_hash()
assert h1 == "7e604937e8a0c8e4c6426906e25b3002f785b1fc"
assert mol.get_molecular_formula() == "H2O2"

mol2 = Molecule(orient=False, **mol.dict())
Expand Down Expand Up @@ -779,3 +791,125 @@ def test_extras():

mol = qcel.models.Molecule(symbols=["He"], geometry=[0, 0, 0], extras={"foo": "bar"})
assert mol.extras["foo"] == "bar"


_ref_mol_multiplicity_hash = {
"singlet": "b3855c64",
"triplet": "7caca87a",
"disinglet": "83a85546",
"ditriplet": "71d6ba82",
}


@pytest.mark.parametrize(
"mult_in,mult_store,validate,exp_hash",
[
pytest.param(3, 3, False, "triplet"),
pytest.param(3, 3, True, "triplet"),
# 3.1 -> 3 (validate=False) below documents the present bad behavior where a float mult
# simply gets cast to int with no error. This will change soon. The validate=True throws a
# nonspecific error that at least mentions type.
pytest.param(3.1, 3, False, "triplet"),
pytest.param(3.0, 3, False, "triplet"),
pytest.param(3.0, 3, True, "triplet"),
pytest.param(1, 1, False, "singlet"),
pytest.param(1, 1, True, "singlet"),
pytest.param(None, 1, False, "singlet"),
pytest.param(None, 1, True, "singlet"),
# fmt: off
pytest.param(3., 3, False, "triplet"),
pytest.param(3., 3, True, "triplet"),
# fmt: on
],
)
def test_mol_multiplicity_types(mult_in, mult_store, validate, exp_hash):
# validate=False passes through pydantic validators. =True passes through molparse.

mol_args = {"symbols": ["He"], "geometry": [0, 0, 0], "validate": validate}
if mult_in is not None:
mol_args["molecular_multiplicity"] = mult_in

mol = qcel.models.Molecule(**mol_args)

assert mult_store == mol.molecular_multiplicity
assert type(mult_store) is type(mol.molecular_multiplicity)
assert mol.get_hash()[:8] == _ref_mol_multiplicity_hash[exp_hash]


@pytest.mark.parametrize(
"mult_in,validate,error",
[
pytest.param(-3, False, "Multiplicity must be positive"),
pytest.param(-3, True, "Multiplicity must be positive"),
],
)
def test_mol_multiplicity_types_errors(mult_in, validate, error):
mol_args = {"symbols": ["He"], "geometry": [0, 0, 0], "validate": validate}
if mult_in is not None:
mol_args["molecular_multiplicity"] = mult_in

with pytest.raises((ValueError, qcel.ValidationError)) as e:
qcel.models.Molecule(**mol_args)

assert error in str(e.value)


@pytest.mark.parametrize(
"mol_mult_in,mult_in,mult_store,validate,exp_hash",
[
pytest.param(5, [3, 3], [3, 3], False, "ditriplet"),
pytest.param(5, [3, 3], [3, 3], True, "ditriplet"),
# 3.1 -> 3 (validate=False) below documents the present bad behavior where a float mult
# simply gets cast to int with no error. This will change soon. The validate=True throws a
# irreconcilable error.
pytest.param(5, [3.1, 3.4], [3, 3], False, "ditriplet"),
# fmt: off
pytest.param(5, [3.0, 3.], [3, 3], False, "ditriplet"),
pytest.param(5, [3.0, 3.], [3, 3], True, "ditriplet"),
# fmt: on
pytest.param(1, [1, 1], [1, 1], False, "disinglet"),
pytest.param(1, [1, 1], [1, 1], True, "disinglet"),
# None in frag_mult not allowed for validate=False
pytest.param(1, [None, None], [1, 1], True, "disinglet"),
],
)
def test_frag_multiplicity_types(mol_mult_in, mult_in, mult_store, validate, exp_hash):
# validate=False passes through pydantic validators. =True passes through molparse.

mol_args = {
"symbols": ["He", "Ne"],
"geometry": [0, 0, 0, 2, 0, 0],
"fragments": [[0], [1]],
"validate": validate,
# below three passed in so hashes match btwn validate=T/F. otherwise, validate=False never
# populates these fields
"molecular_charge": 0,
"fragment_charges": [0, 0],
"molecular_multiplicity": mol_mult_in,
}
if mult_in is not None:
mol_args["fragment_multiplicities"] = mult_in

mol = qcel.models.Molecule(**mol_args)

assert mult_store == mol.fragment_multiplicities
assert type(mult_store) is type(mol.fragment_multiplicities)
assert mol.get_hash()[:8] == _ref_mol_multiplicity_hash[exp_hash]


@pytest.mark.parametrize(
"mult_in,validate,error",
[
pytest.param([-3, 1], False, "Multiplicity must be positive"),
pytest.param([-3, 1], True, "Multiplicity must be positive"),
],
)
def test_frag_multiplicity_types_errors(mult_in, validate, error):
mol_args = {"symbols": ["He", "Ne"], "geometry": [0, 0, 0, 2, 0, 0], "fragments": [[0], [1]], "validate": validate}
if mult_in is not None:
mol_args["fragment_multiplicities"] = mult_in

with pytest.raises((ValueError, qcel.ValidationError)) as e:
qcel.models.Molecule(**mol_args)

assert error in str(e.value)
3 changes: 2 additions & 1 deletion qcelemental/tests/test_molparse_from_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1666,7 +1666,8 @@ def test_badmult_error():
variables=[("bond", "3")],
)

assert "fragment_multiplicities not among None or positive integer" in str(e.value)
# formerly: assert "fragment_multiplicities not among None or positive integer" in str(e.value)
assert "Multiplicity must be positive" in str(e.value)


def test_badchg_error():
Expand Down
Loading

0 comments on commit 936407e

Please sign in to comment.