Skip to content

Commit

Permalink
Merge PR #2698 branch 'drawbidentate'.
Browse files Browse the repository at this point in the history
Adsorbates with more than one surface site were drawn weirdly,
with the second or third surface site often floating up in the air.
With this change, it temporarily forms a bond between the surface sites,
so the layout algorithm sees them as part of a ring, then it rotates
the molecule so they are at the bottom, and then it removes the bond
once the coordinates are determined.

There are some special cases and exceptions for things with many
surface sites. Overall, though, this should make the adsorbates look
much better.
  • Loading branch information
rwest committed Nov 20, 2024
2 parents 43d5bf8 + fd5bdfa commit b30535b
Show file tree
Hide file tree
Showing 2 changed files with 261 additions and 23 deletions.
126 changes: 105 additions & 21 deletions rmgpy/molecule/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import math
import os.path
import re
import itertools

try:
import cairocffi as cairo
Expand All @@ -60,7 +61,8 @@
import numpy as np
from rdkit.Chem import AllChem

from rmgpy.molecule.molecule import Atom, Molecule
from rmgpy.molecule.molecule import Atom, Molecule, Bond
from rmgpy.molecule.pathfinder import find_shortest_path
from rmgpy.qm.molecule import Geometry


Expand Down Expand Up @@ -96,6 +98,12 @@ def create_new_surface(file_format, target=None, width=1024, height=768):

################################################################################

class AdsorbateDrawingError(Exception):
"""
When something goes wrong trying to draw an adsorbate.
"""
pass

class MoleculeDrawer(object):
"""
This class provides functionality for drawing the skeletal formula of
Expand Down Expand Up @@ -207,7 +215,16 @@ def draw(self, molecule, file_format, target=None):
# replace the bonds after generating coordinates. This avoids
# bugs with RDKit
old_bond_dictionary = self._make_single_bonds()
self._generate_coordinates()
if molecule.contains_surface_site():
try:
self._connect_surface_sites()
self._generate_coordinates()
self._disconnect_surface_sites()
except AdsorbateDrawingError as e:
self._disconnect_surface_sites()
self._generate_coordinates(fix_surface_sites=False)
else:
self._generate_coordinates()
self._replace_bonds(old_bond_dictionary)

# Generate labels to use
Expand Down Expand Up @@ -323,11 +340,13 @@ def _find_ring_groups(self):
if not found:
self.ringSystems.append([cycle])

def _generate_coordinates(self):
def _generate_coordinates(self, fix_surface_sites=True):
"""
Generate the 2D coordinates to be used when drawing the current
molecule. The function uses rdKits 2D coordinate generation.
Updates the self.coordinates Array in place.
If `fix_surface_sites` is True, then the surface sites are placed
at the bottom of the molecule.
"""
atoms = self.molecule.atoms
natoms = len(atoms)
Expand Down Expand Up @@ -390,7 +409,6 @@ def _generate_coordinates(self):
# If two atoms lie on top of each other, push them apart a bit
# This is ugly, but at least the mess you end up with isn't as misleading
# as leaving everything piled on top of each other at the origin
import itertools
for atom1, atom2 in itertools.combinations(backbone, 2):
i1, i2 = atoms.index(atom1), atoms.index(atom2)
if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5:
Expand All @@ -402,7 +420,6 @@ def _generate_coordinates(self):
# If two atoms lie on top of each other, push them apart a bit
# This is ugly, but at least the mess you end up with isn't as misleading
# as leaving everything piled on top of each other at the origin
import itertools
for atom1, atom2 in itertools.combinations(backbone, 2):
i1, i2 = atoms.index(atom1), atoms.index(atom2)
if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5:
Expand Down Expand Up @@ -457,26 +474,59 @@ def _generate_coordinates(self):
coordinates[:, 0] = temp[:, 1]
coordinates[:, 1] = temp[:, 0]

# For surface species, rotate them so the site is at the bottom.
if self.molecule.contains_surface_site():
# For surface species
if fix_surface_sites and self.molecule.contains_surface_site():
if len(self.molecule.atoms) == 1:
return coordinates
for site in self.molecule.atoms:
if site.is_surface_site():
break
else:
raise Exception("Can't find surface site")
if site.bonds:
adsorbate = next(iter(site.bonds))
vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adsorbate), :]
angle = math.atan2(vector0[0], vector0[1]) - math.pi
sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()]
if len(sites) == 1:
# rotate them so the site is at the bottom.
site = sites[0]
if site.bonds:
adatom = next(iter(site.bonds))
vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adatom), :]
angle = math.atan2(vector0[0], vector0[1]) - math.pi
rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
self.coordinates = coordinates = np.dot(coordinates, rot)
else:
# van der Waals
index = atoms.index(site)
coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit
coordinates[index, 0] = coordinates[:, 0].mean() # and center it
elif len(sites) <= 4:
# Rotate so the line of best fit through the adatoms is horizontal.
# find atoms bonded to sites
adatoms = [next(iter(site.bonds)) for site in sites]
adatom_indices = [atoms.index(a) for a in adatoms]
# find the best fit line through the bonded atoms
x = coordinates[adatom_indices, 0]
y = coordinates[adatom_indices, 1]
A = np.vstack([x, np.ones(len(x))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0]
# rotate so the line is horizontal
angle = -math.atan(m)
rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
self.coordinates = coordinates = np.dot(coordinates, rot)
# if the line is above the middle, flip it
not_site_indices = [atoms.index(a) for a in atoms if not a.is_surface_site()]
if coordinates[adatom_indices, 1].mean() > coordinates[not_site_indices, 1].mean():
coordinates[:, 1] *= -1
x = coordinates[adatom_indices, 0]
y = coordinates[adatom_indices, 1]
site_y_pos = min(min(y) - 0.8, min(coordinates[not_site_indices, 1]) - 0.5)
if max(y) - site_y_pos > 1.5:
raise AdsorbateDrawingError("Adsorbate bond too long")
for x1, x2 in itertools.combinations(x, 2):
if abs(x1 - x2) < 0.2:
raise AdsorbateDrawingError("Sites overlapping")
for site, x_pos in zip(sites, x):
index = atoms.index(site)
coordinates[index, 1] = site_y_pos
coordinates[index, 0] = x_pos

else:
# van der waals
index = atoms.index(site)
coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit
coordinates[index, 0] = coordinates[:, 0].mean() # and center it
# more than 4 surface sites? leave them alone
pass

def _find_cyclic_backbone(self):
"""
Expand Down Expand Up @@ -854,7 +904,7 @@ def _generate_functional_group_coordinates(self, atom0, atom1):
# Check to see if atom1 is in any cycles in the molecule
ring_system = None
for ring_sys in self.ringSystems:
if any([atom1 in ring for ring in ring_sys]):
if any(atom1 in ring for ring in ring_sys):
ring_system = ring_sys

if ring_system is not None:
Expand Down Expand Up @@ -1624,6 +1674,40 @@ def _replace_bonds(self, bond_order_dictionary):
for bond, order in bond_order_dictionary.items():
bond.set_order_num(order)

def _connect_surface_sites(self):
"""
Creates single bonds between atoms that are surface sites.
This is to help make multidentate adsorbates look better.
"""
sites = [a for a in self.molecule.atoms if a.is_surface_site()]
if len(sites) > 4:
return
for site1 in sites:
other_sites = [a for a in sites if a != site1]
if not other_sites: break
# connect to the nearest site
site2 = min(other_sites, key=lambda a: len(find_shortest_path(site1, a)))
if len(find_shortest_path(site1, site2)) > 2 and len(sites) > 3:
# if there are more than 3 sites, don't connect sites that aren't neighbors
continue

bond = site1.bonds.get(site2)
if bond is None:
bond = Bond(site1, site2, 1)
site1.bonds[site2] = bond
site2.bonds[site1] = bond

def _disconnect_surface_sites(self):
"""
Removes all bonds between atoms that are surface sites.
"""
for site1 in self.molecule.atoms:
if site1.is_surface_site():
for site2 in list(site1.bonds.keys()): # make a list copy so we can delete from the dict
if site2.is_surface_site():
del site1.bonds[site2]
del site2.bonds[site1]


################################################################################

Expand Down
158 changes: 156 additions & 2 deletions test/rmgpy/molecule/drawTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@

import os
import os.path
import itertools


from rmgpy.molecule import Molecule
from rmgpy.molecule import Molecule, Atom, Bond
from rmgpy.molecule.draw import MoleculeDrawer
from rmgpy.species import Species

Expand Down Expand Up @@ -144,3 +144,157 @@ def test_draw_hydrogen_bond_adsorbate(self):
from cairo import PDFSurface
surface, _cr, (_xoff, _yoff, _width, _height) = self.drawer.draw(molecule, file_format="pdf")
assert isinstance(surface, PDFSurface)

def test_draw_bidentate_adsorbate(self):
try:
from cairocffi import ImageSurface
except ImportError:
from cairo import ImageSurface

test_molecules = [
Molecule().from_adjacency_list(
"""
1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}
2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S}
3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S}
4 X u0 p0 c0 {1,S}
5 H u0 p0 c0 {1,S}
6 H u0 p0 c0 {1,S}
7 H u0 p0 c0 {2,S}
8 X u0 p0 c0 {2,S}
9 H u0 p0 c0 {3,S}
10 H u0 p0 c0 {3,S}
11 H u0 p0 c0 {3,S}
"""),
Molecule().from_adjacency_list(
"""
1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}
2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S}
3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S}
4 X u0 p0 c0 {1,S}
5 H u0 p0 c0 {1,S}
6 H u0 p0 c0 {1,S}
7 H u0 p0 c0 {2,S}
8 H u0 p0 c0 {2,S}
9 H u0 p0 c0 {3,S}
10 H u0 p0 c0 {3,S}
11 X u0 p0 c0 {3,S}
"""),
]
for molecule in [Molecule(smiles="CC(=O)CCO"), Molecule(smiles="C1CC=CC1CC"), Molecule(smiles="C=CCC(O)=O")]:
bondable = [a for a in molecule.atoms if a.is_non_hydrogen() and any(b.is_hydrogen() for b in a.bonds)]
for b1, b2 in itertools.combinations(bondable, 2):
# find a hydrogen atom bonded to each of the two atoms
for h1 in b1.bonds:
if h1.is_hydrogen():
break
for h2 in b2.bonds:
if h2.is_hydrogen():
break
molecule.remove_atom(h1)
molecule.remove_atom(h2)
x1 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
x2 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
molecule.add_atom(x1)
molecule.add_atom(x2)
molecule.add_bond(Bond(b1, x1, order=1))
molecule.add_bond(Bond(b2, x2, order=1))
test_molecules.append(molecule.copy(deep=True))
molecule.remove_atom(x1)
molecule.remove_atom(x2)
molecule.add_atom(h1)
molecule.add_atom(h2)
molecule.add_bond(Bond(b1, h1, order=1))
molecule.add_bond(Bond(b2, h2, order=1))

for b1, b2, b3 in itertools.combinations(bondable, 3):
# find a hydrogen atom bonded to each of the two atoms
for h1 in b1.bonds:
if h1.is_hydrogen():
break
for h2 in b2.bonds:
if h2.is_hydrogen():
break
for h3 in b3.bonds:
if h3.is_hydrogen():
break
molecule.remove_atom(h1)
molecule.remove_atom(h2)
molecule.remove_atom(h3)
x1 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
x2 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
x3 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
molecule.add_atom(x1)
molecule.add_atom(x2)
molecule.add_atom(x3)
molecule.add_bond(Bond(b1, x1, order=1))
molecule.add_bond(Bond(b2, x2, order=1))
molecule.add_bond(Bond(b3, x3, order=1))
test_molecules.append(molecule.copy(deep=True))
molecule.remove_atom(x1)
molecule.remove_atom(x2)
molecule.remove_atom(x3)
molecule.add_atom(h1)
molecule.add_atom(h2)
molecule.add_atom(h3)
molecule.add_bond(Bond(b1, h1, order=1))
molecule.add_bond(Bond(b2, h2, order=1))
molecule.add_bond(Bond(b3, h3, order=1))

test_molecules.append(Molecule().from_adjacency_list(
"""
1 C u0 p0 c0 {2,S} {3,S} {9,S} {10,S}
2 X u0 p0 c0 {1,S}
3 C u0 p0 c0 {1,S} {4,S} {5,S} {11,S}
4 X u0 p0 c0 {3,S}
5 C u0 p0 c0 {3,S} {6,S} {7,S} {12,S}
6 X u0 p0 c0 {5,S}
7 C u0 p0 c0 {5,S} {8,S} {13,S} {14,S}
8 X u0 p0 c0 {7,S}
9 H u0 p0 c0 {1,S}
10 H u0 p0 c0 {1,S}
11 H u0 p0 c0 {3,S}
12 H u0 p0 c0 {5,S}
13 H u0 p0 c0 {7,S}
14 H u0 p0 c0 {7,S}
"""))
test_molecules.append(Molecule().from_adjacency_list(
"""
1 O u0 p2 c0 {4,S} {5,S}
2 O u0 p2 c0 {5,S} {6,S}
3 O u0 p2 c0 {6,S} {26,S}
4 C u0 p0 c0 {1,S} {7,S} {8,S} {19,S}
5 C u0 p0 c0 {1,S} {2,S} {10,S} {21,S}
6 C u0 p0 c0 {2,S} {3,S} {9,S} {20,S}
7 C u0 p0 c0 {4,S} {15,S} {16,S} {24,S}
8 C u0 p0 c0 {4,S} {17,S} {18,S} {25,S}
9 C u0 p0 c0 {6,S} {11,S} {12,S} {22,S}
10 C u0 p0 c0 {5,S} {13,S} {14,S} {23,S}
11 H u0 p0 c0 {9,S}
12 H u0 p0 c0 {9,S}
13 H u0 p0 c0 {10,S}
14 H u0 p0 c0 {10,S}
15 H u0 p0 c0 {7,S}
16 H u0 p0 c0 {7,S}
17 H u0 p0 c0 {8,S}
18 H u0 p0 c0 {8,S}
19 X u0 p0 c0 {4,S}
20 X u0 p0 c0 {6,S}
21 X u0 p0 c0 {5,S}
22 X u0 p0 c0 {9,S}
23 X u0 p0 c0 {10,S}
24 X u0 p0 c0 {7,S}
25 X u0 p0 c0 {8,S}
26 X u0 p0 c0 {3,S}
"""))
test_molecules.append(Molecule(smiles="*CC(*)(C*)OCC#*"))
test_molecules.append(Molecule(smiles="*CC(*)(C*)C*"))
for number, molecule in enumerate(test_molecules, 1):
path = f"test_polydentate_{number}.png"
if os.path.exists(path):
os.unlink(path)
self.drawer.clear()
surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(molecule, file_format="png", target=path)
assert os.path.exists(path), "File doesn't exist"
os.unlink(path)
assert isinstance(surface, ImageSurface)

0 comments on commit b30535b

Please sign in to comment.