Skip to content

Commit

Permalink
Optimize conformer comparison algorithm (#765)
Browse files Browse the repository at this point in the history
The previous conformer comparison algorithm required a distance matrix
(d-matrix) for operations. In this update, we introduce a new variable,
`fl_distance`, which represents the distance between the first and last
elements. This method provides a less costly preliminary comparison
before invoking the d-matrix comparison. Now, the d-matrix comparison
will only be executed when necessary, optimizing the overall efficiency
of the algorithm.
  • Loading branch information
alongd authored Sep 2, 2024
2 parents 8ba45be + 693b2f3 commit 9e5c210
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 9 deletions.
30 changes: 23 additions & 7 deletions arc/species/conformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,15 +519,23 @@ def conformers_combinations_by_lowest_conformer(label, mol, base_xyz, multiple_t
newest_conformers_dict[tor] = list() # Keys are torsions for plotting.
for xyz, energy, dihedral in zip(xyzs, energies, sampling_points):
exists = False
if any([converter.compare_confs(xyz, conf['xyz']) for conf in new_conformers + newest_conformer_list]):
exists = True
dmat1, fl_distance1 = None, None
for conf in new_conformers + newest_conformer_list:
fl_distance1, dmat1, conf, similar = converter.compare_confs_fl(xyz,conf)
if not similar:
break
if converter.compare_confs(xyz, conf['xyz'], skip_conversion=True, dmat1=dmat1, dmat2=conf['dmat']):
exists = True
break
if xyz is not None and energy is not None:
conformer = {'index': len_conformers + len(new_conformers) + len(newest_conformer_list),
'xyz': xyz,
'FF energy': round(energy, 3),
'source': f'Changing dihedrals on most stable conformer, iteration {i}',
'torsion': tor,
'dihedral': round(dihedral, 2)}
'dihedral': round(dihedral, 2),
'dmat': dmat1,
'fl_distance': fl_distance1}
newest_conformers_dict[tor].append(conformer)
if not exists:
newest_conformer_list.append(conformer)
Expand All @@ -541,7 +549,9 @@ def conformers_combinations_by_lowest_conformer(label, mol, base_xyz, multiple_t
'FF energy': None,
'source': f'Changing dihedrals on most stable conformer, iteration {i}, but FF energy is None',
'torsion': tor,
'dihedral': round(dihedral, 2)})
'dihedral': round(dihedral, 2),
'dmat': dmat1,
'fl_distance': fl_distance1})
new_conformers.extend(newest_conformer_list)
if not newest_conformer_list:
newest_conformer_list = [lowest_conf_i]
Expand Down Expand Up @@ -1113,9 +1123,15 @@ def get_lowest_confs(label: str,
for index in range(len(conformer_list)):
if (e is not None and conformer_list[index][energy] > min_e + e) or (n is not None and len(lowest_confs) >= n):
break
if index > 0 and not any([converter.compare_confs(lowest_conf['xyz'], conformer_list[index]['xyz'])
for lowest_conf in lowest_confs]):
lowest_confs.append(conformer_list[index])
if index > 0:
for lowest_conf in lowest_confs:
_, dmat1, lowest_conf, similar = converter.compare_confs_fl(conformer_list[index]['xyz'],lowest_conf)
if not similar or not converter.compare_confs(conformer_list[index]['xyz'], lowest_conf['xyz'],
skip_conversion=True,
dmat1=dmat1,dmat2=lowest_conf['dmat']):
lowest_confs.append(conformer_list[index]) if lowest_conf==lowest_confs[-1] else None
else:
break
return lowest_confs


Expand Down
48 changes: 46 additions & 2 deletions arc/species/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2011,11 +2011,51 @@ def compare_zmats(z1, z2, r_tol=0.01, a_tol=2, d_tol=2, verbose=False, symmetric
symmetric_torsions=symmetric_torsions)


def compare_confs_fl(xyz1: dict,
conf2: dict,
rtol: float = 0.01,
) -> Tuple[float, Optional[np.ndarray], dict, bool]:
"""
Compare two Cartesian coordinates representing conformers using first and last atom distances. If the distances are the same,
the distance matrices are computed and returned.
The relative difference (``rtol`` * fl_distance1) is compared against the absolute difference abs(fl_distance1 - fl_distance2).
Args:
xyz1 (dict): Conformer 1.
conf2 (dict): Conformer 2.
rtol (float): The relative tolerance parameter (see Notes).
Returns:
Tuple containing distances and matrices:
- (fl_distance1, dmat1, conf2, similar): The first and last atom distance of conformer 1, its distance matrix,
conformer 2, and whether the two conformers have almost equal atom distances.
"""
similar = False
conf2['fl_distance'] = conf2.get('fl_distance')
conf2['dmat'] = conf2.get('dmat')
xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(conf2['xyz'])
dmat1 = None
fl_distance1 = np.linalg.norm(np.array(xyz1['coords'][0]) - np.array(xyz1['coords'][-1]))
if conf2['fl_distance'] is None:
conf2['fl_distance'] = np.linalg.norm(np.array(xyz2['coords'][0]) - np.array(xyz2['coords'][-1]))
if not np.isclose(fl_distance1, conf2['fl_distance'], rtol=rtol):
return fl_distance1, dmat1, conf2, similar
similar = True
dmat1 = xyz_to_dmat(xyz1)
if conf2['dmat'] is None:
conf2['dmat'] = xyz_to_dmat(xyz2)
return fl_distance1, dmat1, conf2, similar


def compare_confs(xyz1: dict,
xyz2: dict,
rtol: float = 0.01,
atol: float = 0.1,
rmsd_score: bool = False,
skip_conversion: bool = False,
dmat1: Optional[np.ndarray] = None,
dmat2: Optional[np.ndarray] = None,
) -> Union[float, bool]:
"""
Compare two Cartesian coordinates representing conformers using distance matrices.
Expand All @@ -2029,15 +2069,19 @@ def compare_confs(xyz1: dict,
rtol (float): The relative tolerance parameter (see Notes).
atol (float): The absolute tolerance parameter (see Notes).
rmsd_score (bool): Whether to output a root-mean-square deviation score of the two distance matrices.
skip_conversion (bool): Whether to skip converting xyz to distance matrices.
dmat1 (np.ndarray, optional): The distance matrix of conformer 1.
dmat2 (np.ndarray, optional): The distance matrix of conformer 2.
Returns:
Union[float, bool]:
- If ``rmsd_score`` is ``False`` (default): Whether the two conformers have almost equal atom distances.
``True`` if they do.
- If ``rmsd_score`` is ``True``: The RMSD score of two distance matrices.
"""
xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(xyz2)
dmat1, dmat2 = xyz_to_dmat(xyz1), xyz_to_dmat(xyz2)
if not skip_conversion:
xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(xyz2)
dmat1, dmat2 = xyz_to_dmat(xyz1), xyz_to_dmat(xyz2)
if rmsd_score:
# Distance matrix is symmetric, only need the upper triangular part to compute rmsd.
rmsd = calc_rmsd(np.triu(dmat1), np.triu(dmat2))
Expand Down

0 comments on commit 9e5c210

Please sign in to comment.