Merge PR #2698 branch 'drawbidentate'.

Adsorbates with more than one surface site were drawn weirdly, with the second or third surface site often floating up in the air. With this change, it temporarily forms a bond between the surface sites, so the layout algorithm sees them as part of a ring, then it rotates the molecule so they are at the bottom, and then it removes the bond once the coordinates are determined. There are some special cases and exceptions for things with many surface sites. Overall, though, this should make the adsorbates look much better.
ReactionMechanismGenerator · Nov 20, 2024 · b30535b · b30535b
2 parents 43d5bf8 + fd5bdfa
commit b30535b
Show file tree

Hide file tree

Showing 2 changed files with 261 additions and 23 deletions.
diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py
@@ -49,6 +49,7 @@
 import math
 import os.path
 import re
+import itertools
 
 try:
     import cairocffi as cairo
@@ -60,7 +61,8 @@
 import numpy as np
 from rdkit.Chem import AllChem
 
-from rmgpy.molecule.molecule import Atom, Molecule
+from rmgpy.molecule.molecule import Atom, Molecule, Bond
+from rmgpy.molecule.pathfinder import find_shortest_path
 from rmgpy.qm.molecule import Geometry
 
 
@@ -96,6 +98,12 @@ def create_new_surface(file_format, target=None, width=1024, height=768):
 
 ################################################################################
 
+class AdsorbateDrawingError(Exception):
+    """
+    When something goes wrong trying to draw an adsorbate.
+    """
+    pass
+
 class MoleculeDrawer(object):
     """
     This class provides functionality for drawing the skeletal formula of
@@ -207,7 +215,16 @@ def draw(self, molecule, file_format, target=None):
                 # replace the bonds after generating coordinates. This avoids
                 # bugs with RDKit
                 old_bond_dictionary = self._make_single_bonds()
-                self._generate_coordinates()
+                if molecule.contains_surface_site():
+                    try:
+                        self._connect_surface_sites()
+                        self._generate_coordinates()
+                        self._disconnect_surface_sites()
+                    except AdsorbateDrawingError as e:
+                        self._disconnect_surface_sites()
+                        self._generate_coordinates(fix_surface_sites=False)
+                else:
+                    self._generate_coordinates()
                 self._replace_bonds(old_bond_dictionary)
 
                 # Generate labels to use
@@ -323,11 +340,13 @@ def _find_ring_groups(self):
                 if not found:
                     self.ringSystems.append([cycle])
 
-    def _generate_coordinates(self):
+    def _generate_coordinates(self, fix_surface_sites=True):
         """
         Generate the 2D coordinates to be used when drawing the current 
         molecule. The function uses rdKits 2D coordinate generation.
         Updates the self.coordinates Array in place.
+        If `fix_surface_sites` is True, then the surface sites are placed
+        at the bottom of the molecule.
         """
         atoms = self.molecule.atoms
         natoms = len(atoms)
@@ -390,7 +409,6 @@ def _generate_coordinates(self):
             # If two atoms lie on top of each other, push them apart a bit
             # This is ugly, but at least the mess you end up with isn't as misleading
             # as leaving everything piled on top of each other at the origin
-            import itertools
             for atom1, atom2 in itertools.combinations(backbone, 2):
                 i1, i2 = atoms.index(atom1), atoms.index(atom2)
                 if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5:
@@ -402,7 +420,6 @@ def _generate_coordinates(self):
             # If two atoms lie on top of each other, push them apart a bit
             # This is ugly, but at least the mess you end up with isn't as misleading
             # as leaving everything piled on top of each other at the origin
-            import itertools
             for atom1, atom2 in itertools.combinations(backbone, 2):
                 i1, i2 = atoms.index(atom1), atoms.index(atom2)
                 if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5:
@@ -457,26 +474,59 @@ def _generate_coordinates(self):
                 coordinates[:, 0] = temp[:, 1]
                 coordinates[:, 1] = temp[:, 0]
 
-        # For surface species, rotate them so the site is at the bottom.
-        if self.molecule.contains_surface_site():
+        # For surface species
+        if fix_surface_sites and self.molecule.contains_surface_site():
             if len(self.molecule.atoms) == 1:
                 return coordinates
-            for site in self.molecule.atoms:
-                if site.is_surface_site():
-                    break
-            else:
-                raise Exception("Can't find surface site")
-            if site.bonds:
-                adsorbate = next(iter(site.bonds))
-                vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adsorbate), :]
-                angle = math.atan2(vector0[0], vector0[1]) - math.pi
+            sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()]
+            if len(sites) == 1:
+                # rotate them so the site is at the bottom.
+                site = sites[0]
+                if site.bonds:
+                    adatom = next(iter(site.bonds))
+                    vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adatom), :]
+                    angle = math.atan2(vector0[0], vector0[1]) - math.pi
+                    rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
+                    self.coordinates = coordinates = np.dot(coordinates, rot)
+                else:
+                    # van der Waals
+                    index = atoms.index(site)
+                    coordinates[index, 1] = min(coordinates[:, 1]) - 0.8  # just move the site down a bit
+                    coordinates[index, 0] = coordinates[:, 0].mean()  # and center it
+            elif len(sites) <= 4:
+                # Rotate so the line of best fit through the adatoms is horizontal.
+                # find atoms bonded to sites
+                adatoms = [next(iter(site.bonds)) for site in sites]
+                adatom_indices = [atoms.index(a) for a in adatoms]
+                # find the best fit line through the bonded atoms
+                x = coordinates[adatom_indices, 0]
+                y = coordinates[adatom_indices, 1]
+                A = np.vstack([x, np.ones(len(x))]).T
+                m, c = np.linalg.lstsq(A, y, rcond=None)[0]
+                # rotate so the line is horizontal
+                angle = -math.atan(m)
                 rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
                 self.coordinates = coordinates = np.dot(coordinates, rot)
+                # if the line is above the middle, flip it
+                not_site_indices = [atoms.index(a) for a in atoms if not a.is_surface_site()]
+                if coordinates[adatom_indices, 1].mean() > coordinates[not_site_indices, 1].mean():
+                    coordinates[:, 1] *= -1
+                x = coordinates[adatom_indices, 0]
+                y = coordinates[adatom_indices, 1]
+                site_y_pos = min(min(y) - 0.8, min(coordinates[not_site_indices, 1]) - 0.5)
+                if max(y) - site_y_pos > 1.5:
+                    raise AdsorbateDrawingError("Adsorbate bond too long")
+                for x1, x2 in itertools.combinations(x, 2):
+                    if abs(x1 - x2) < 0.2:
+                        raise AdsorbateDrawingError("Sites overlapping")
+                for site, x_pos in zip(sites, x):
+                    index = atoms.index(site)
+                    coordinates[index, 1] = site_y_pos
+                    coordinates[index, 0] = x_pos
+
             else:
-                # van der waals
-                index = atoms.index(site)
-                coordinates[index, 1] = min(coordinates[:, 1]) - 0.8  # just move the site down a bit
-                coordinates[index, 0] = coordinates[:, 0].mean()  # and center it
+                # more than 4 surface sites? leave them alone
+                pass
 
     def _find_cyclic_backbone(self):
         """
@@ -854,7 +904,7 @@ def _generate_functional_group_coordinates(self, atom0, atom1):
         # Check to see if atom1 is in any cycles in the molecule
         ring_system = None
         for ring_sys in self.ringSystems:
-            if any([atom1 in ring for ring in ring_sys]):
+            if any(atom1 in ring for ring in ring_sys):
                 ring_system = ring_sys
 
         if ring_system is not None:
@@ -1624,6 +1674,40 @@ def _replace_bonds(self, bond_order_dictionary):
         for bond, order in bond_order_dictionary.items():
             bond.set_order_num(order)
 
+    def _connect_surface_sites(self):
+        """
+        Creates single bonds between atoms that are surface sites.
+        This is to help make multidentate adsorbates look better.
+        """
+        sites = [a for a in self.molecule.atoms if a.is_surface_site()]
+        if len(sites) > 4:
+            return
+        for site1 in sites:
+            other_sites = [a for a in sites if a != site1]
+            if not other_sites: break
+            # connect to the nearest site
+            site2 = min(other_sites, key=lambda a: len(find_shortest_path(site1, a)))
+            if len(find_shortest_path(site1, site2)) > 2 and len(sites) > 3:
+                # if there are more than 3 sites, don't connect sites that aren't neighbors
+                continue
+
+            bond = site1.bonds.get(site2)
+            if bond is None:
+                bond = Bond(site1, site2, 1)
+                site1.bonds[site2] = bond
+                site2.bonds[site1] = bond
+
+    def _disconnect_surface_sites(self):
+        """
+        Removes all bonds between atoms that are surface sites.
+        """
+        for site1 in self.molecule.atoms:
+            if site1.is_surface_site():
+                for site2 in list(site1.bonds.keys()): # make a list copy so we can delete from the dict
+                    if site2.is_surface_site():
+                        del site1.bonds[site2]
+                        del site2.bonds[site1]
+
 
 ################################################################################
 

diff --git a/test/rmgpy/molecule/drawTest.py b/test/rmgpy/molecule/drawTest.py
@@ -33,9 +33,9 @@
 
 import os
 import os.path
+import itertools
 
-
-from rmgpy.molecule import Molecule
+from rmgpy.molecule import Molecule, Atom, Bond
 from rmgpy.molecule.draw import MoleculeDrawer
 from rmgpy.species import Species
 
@@ -144,3 +144,157 @@ def test_draw_hydrogen_bond_adsorbate(self):
             from cairo import PDFSurface
         surface, _cr, (_xoff, _yoff, _width, _height) = self.drawer.draw(molecule, file_format="pdf")
         assert isinstance(surface, PDFSurface)
+
+    def test_draw_bidentate_adsorbate(self):
+        try:
+            from cairocffi import ImageSurface
+        except ImportError:
+            from cairo import ImageSurface
+
+        test_molecules = [
+            Molecule().from_adjacency_list(
+            """
+1  C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}
+2  C u0 p0 c0 {1,S} {3,S} {7,S} {8,S}
+3  C u0 p0 c0 {2,S} {9,S} {10,S} {11,S}
+4  X u0 p0 c0 {1,S}
+5  H u0 p0 c0 {1,S}
+6  H u0 p0 c0 {1,S}
+7  H u0 p0 c0 {2,S}
+8  X u0 p0 c0 {2,S}
+9  H u0 p0 c0 {3,S}
+10 H u0 p0 c0 {3,S}
+11 H u0 p0 c0 {3,S}
+        """),
+            Molecule().from_adjacency_list(
+"""
+1  C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}
+2  C u0 p0 c0 {1,S} {3,S} {7,S} {8,S}
+3  C u0 p0 c0 {2,S} {9,S} {10,S} {11,S}
+4  X u0 p0 c0 {1,S}
+5  H u0 p0 c0 {1,S}
+6  H u0 p0 c0 {1,S}
+7  H u0 p0 c0 {2,S}
+8  H u0 p0 c0 {2,S}
+9  H u0 p0 c0 {3,S}
+10 H u0 p0 c0 {3,S}
+11 X u0 p0 c0 {3,S}
+"""),
+        ]
+        for molecule in [Molecule(smiles="CC(=O)CCO"), Molecule(smiles="C1CC=CC1CC"), Molecule(smiles="C=CCC(O)=O")]:
+            bondable = [a for a in molecule.atoms if a.is_non_hydrogen() and any(b.is_hydrogen() for b in a.bonds)]
+            for b1, b2 in itertools.combinations(bondable, 2):
+                # find a hydrogen atom bonded to each of the two atoms
+                for h1 in b1.bonds:
+                    if h1.is_hydrogen():
+                        break
+                for h2 in b2.bonds:
+                    if h2.is_hydrogen():
+                        break
+                molecule.remove_atom(h1)
+                molecule.remove_atom(h2)
+                x1 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
+                x2 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
+                molecule.add_atom(x1)
+                molecule.add_atom(x2)
+                molecule.add_bond(Bond(b1, x1, order=1))
+                molecule.add_bond(Bond(b2, x2, order=1))
+                test_molecules.append(molecule.copy(deep=True))
+                molecule.remove_atom(x1)
+                molecule.remove_atom(x2)
+                molecule.add_atom(h1)
+                molecule.add_atom(h2)
+                molecule.add_bond(Bond(b1, h1, order=1))
+                molecule.add_bond(Bond(b2, h2, order=1))
+
+            for b1, b2, b3 in itertools.combinations(bondable, 3):
+                # find a hydrogen atom bonded to each of the two atoms
+                for h1 in b1.bonds:
+                    if h1.is_hydrogen():
+                        break
+                for h2 in b2.bonds:
+                    if h2.is_hydrogen():
+                        break
+                for h3 in b3.bonds:
+                    if h3.is_hydrogen():
+                        break
+                molecule.remove_atom(h1)
+                molecule.remove_atom(h2)
+                molecule.remove_atom(h3)
+                x1 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
+                x2 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
+                x3 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
+                molecule.add_atom(x1)
+                molecule.add_atom(x2)
+                molecule.add_atom(x3)
+                molecule.add_bond(Bond(b1, x1, order=1))
+                molecule.add_bond(Bond(b2, x2, order=1))
+                molecule.add_bond(Bond(b3, x3, order=1))
+                test_molecules.append(molecule.copy(deep=True))
+                molecule.remove_atom(x1)
+                molecule.remove_atom(x2)
+                molecule.remove_atom(x3)
+                molecule.add_atom(h1)
+                molecule.add_atom(h2)
+                molecule.add_atom(h3)
+                molecule.add_bond(Bond(b1, h1, order=1))
+                molecule.add_bond(Bond(b2, h2, order=1))
+                molecule.add_bond(Bond(b3, h3, order=1))
+
+        test_molecules.append(Molecule().from_adjacency_list(
+"""
+1  C  u0 p0 c0 {2,S} {3,S} {9,S} {10,S}
+2  X u0 p0 c0 {1,S}
+3  C  u0 p0 c0 {1,S} {4,S} {5,S} {11,S}
+4  X u0 p0 c0 {3,S}
+5  C  u0 p0 c0 {3,S} {6,S} {7,S} {12,S}
+6  X u0 p0 c0 {5,S}
+7  C  u0 p0 c0 {5,S} {8,S} {13,S} {14,S}
+8  X u0 p0 c0 {7,S}
+9  H  u0 p0 c0 {1,S}
+10 H  u0 p0 c0 {1,S}
+11 H  u0 p0 c0 {3,S}
+12 H  u0 p0 c0 {5,S}
+13 H  u0 p0 c0 {7,S}
+14 H  u0 p0 c0 {7,S}
+"""))
+        test_molecules.append(Molecule().from_adjacency_list(
+"""
+1 O u0 p2 c0 {4,S} {5,S}
+2 O u0 p2 c0 {5,S} {6,S}
+3 O u0 p2 c0 {6,S} {26,S}
+4 C u0 p0 c0 {1,S} {7,S} {8,S} {19,S}
+5 C u0 p0 c0 {1,S} {2,S} {10,S} {21,S}
+6 C u0 p0 c0 {2,S} {3,S} {9,S} {20,S}
+7 C u0 p0 c0 {4,S} {15,S} {16,S} {24,S}
+8 C u0 p0 c0 {4,S} {17,S} {18,S} {25,S}
+9 C u0 p0 c0 {6,S} {11,S} {12,S} {22,S}
+10 C u0 p0 c0 {5,S} {13,S} {14,S} {23,S}
+11 H u0 p0 c0 {9,S}
+12 H u0 p0 c0 {9,S}
+13 H u0 p0 c0 {10,S}
+14 H u0 p0 c0 {10,S}
+15 H u0 p0 c0 {7,S}
+16 H u0 p0 c0 {7,S}
+17 H u0 p0 c0 {8,S}
+18 H u0 p0 c0 {8,S}
+19 X u0 p0 c0 {4,S}
+20 X u0 p0 c0 {6,S}
+21 X u0 p0 c0 {5,S}
+22 X u0 p0 c0 {9,S}
+23 X u0 p0 c0 {10,S}
+24 X u0 p0 c0 {7,S}
+25 X u0 p0 c0 {8,S}
+26 X u0 p0 c0 {3,S}
+"""))
+        test_molecules.append(Molecule(smiles="*CC(*)(C*)OCC#*"))
+        test_molecules.append(Molecule(smiles="*CC(*)(C*)C*"))
+        for number, molecule in enumerate(test_molecules, 1):
+            path = f"test_polydentate_{number}.png"
+            if os.path.exists(path):
+                os.unlink(path)
+            self.drawer.clear()
+            surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(molecule, file_format="png", target=path)
+            assert os.path.exists(path), "File doesn't exist"
+            os.unlink(path)
+            assert isinstance(surface, ImageSurface)