Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First version of coordinate generatign processor #328

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions vermouth/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@
from .sort_molecule_atoms import SortMoleculeAtoms
from .merge_all_molecules import MergeAllMolecules
from .annotate_mut_mod import AnnotateMutMod
from .generate_coordinates import GenerateCoordinates
368 changes: 368 additions & 0 deletions vermouth/processors/generate_coordinates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,368 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2018 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Provides a processor for building missing coordinates.
"""

import numpy as np
import networkx as nx

from .processor import Processor
from .. import selectors


def mindist(A, B):
'''
Determine the smallest distance between two point sets (np.ndarray) A and B
'''
return ((A[:, None] - B[None])**2).sum(axis=2).min(axis=1)


def get_atoms_missing_coords(molecule):
'''
Determine particles without coordinates in molecule.
'''
return [
ix for ix in molecule
if not selectors.selector_has_position(molecule.nodes[ix])
]


def get_anchors(molecule, indices):
'''
Determine particles with known coordinates connected to
particles with given indices.
Comment on lines +46 to +47
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docstring needs improving

'''
return {
a for ix in indices for a in molecule[ix].keys()
if selectors.selector_has_position(molecule.nodes[a])
}


def align_z(v):
'''
Generate rotation matrix aligning z-axis onto vector v (and vice-versa).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it an idea to rename v to vector here? It's a bit of a nitpick from my side, admittedly

'''
# This routine is based on the notion that for any two
# normalized vectors, the alignment is a 180 degree
# rotation around the resultant vector.
Comment on lines +60 to +61
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is so good I wonder if it should be part of the docstring.


w = v / (v ** 2).sum() ** 0.5

if w[2] <= 1e-8 - 1:
return -np.eye(3) # Mind the inversion ...

w[2] += 1

return (2 / (w**2).sum()) * w * w[:, None] - np.eye(3)


def conicring(points=24, distance=0.125, angle=109.4, axis=None):
'''
Create a ring of positions corresponding to a cone with given angle.
An angle of 109.4 is suitable for ideal SP3 substituents.
'''
p = 2 * np.pi * np.arange(points) / points
r = distance * np.sin(angle * np.pi / 360)
z = distance * np.cos(angle * np.pi / 360)
X = np.stack([r * np.cos(p), r * np.sin(p), z * np.ones(points)], axis=1)
if axis is None:
return X
return X @ align_z(axis)


def double_cone(points=24, distance=0.125, angle=109.4, axis=None):
'''Create positions for two consecutive (SP3) bonded particles'''
P = conicring(points, distance, angle, axis)
S = np.array([P @ align_z(x) + x for x in P])
return P, S


def triple_cone(points=24, distance=0.125, angle=109.4):
'''Create possible positions for three consecutive (SP3) bonded particles'''
P, S = double_cone(points, distance, angle)
T = np.array([[[P @ align_z(u - x) + u] for u in U] for x, U in zip(P, S)])
return P, S, T


def _out(molecule, anchor, base, distance=0.125):
'''
Generate coordinates for atom bonded to anchor
using resultant vector of known substituents.

This approach works for placement of single unknown
substituents on sp1, sp2 or sp3 atoms. It should also
work for bipyramidal and other configurations.

b1
\
b2--a-->X
/
b3
Comment on lines +109 to +114
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice if the variable names would at least be matching the drawing.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Concur.


X := position to determine
a := position of anchoring particle
B = [b1, b2, ...] := positions of base particles

Parameters
----------
molecule: vermouth.molecule.Molecule
A molecule with missing atoms
anchor: integer
Index to anchor node: a node with positions connected to a node without
base: list of integers
Indices to particles with positions connected to anchor
distance: float
Distance of target position from anchor

Returns
-------
position
'''
a = molecule.nodes[anchor]['position']
B = [ molecule.nodes[ix]['position'] for ix in base ]
B = B - a
u = -B.sum(axis=0)
u *= distance / ((u ** 2).sum() ** 0.5)
return a + u


def _chiral(molecule, anchor, base, distance=(0.125, 0.125)):
'''
Generate coordinates for atoms bonded to chiral center
based on two known substituents.

This approach uses the positions of two substituents to
generate coordinates for one or two atoms connected to
a chiral center. The two atoms are indicated as `up` and
`down`, referring to the direction with respect to the
cross-product of the two substituents. If these control
atoms are not given explicitly in order, then the control
atoms are inferred from the connections of the anchor with
known coordinates. In that case, the specific chirality
of the center (R or S) cannot be controlled.

NOTE: This function should probably be able to determine
the weights of the substituents to allow placement
based on R or S chirality.

Parameters
----------
molecule: vermouth.molecule.Molecule
A molecule with missing atoms
anchor: integer
Index to anchor node: a node with positions connected to a node without
base: list of integers
Indices to particles with positions connected to anchor
distance: tuple of two floats
Distances of target positions from anchor

Returns
-------
tuple of positions (up, down)
'''
a = molecule.nodes[anchor]['position']
B = [ molecule.nodes[ix]['position'] for ix in base ]
B = B - a
# Negative resultant vector
u = -0.5 * (B[0] + B[1])
v = np.cross(B[0], B[1])
# Set length of v to length to half distance between subs
v = 0.5 * ((B[0] - B[1])**2).sum()**0.5 * v / (v ** 2).sum()**0.5
uv = u + v
# Normalization of vector sum
n = 1 / (uv**2).sum()**0.5

rup, rdown = distance

pup = a + rup * n * uv
pdown = a + rdown * n * (u - v)

return pup, pdown


class Segment:
'''Class for subgraphs of (missing) atoms with anchors'''
def __init__(self, molecule, limbs):
self.molecule = molecule
self.anchors = limbs[0][0]
self.bases = limbs[0][1]
Comment on lines +198 to +202
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Especially, needs doc-string of what is what

self.limbs = [ s[2] for s in limbs ]

def __str__(self):
return ':'.join([str(self.anchors), str(self.bases), str(self.limbs)])

def extrude(self, distance=0.125, coords=None, contact=0.5):
'''
Generate positions for particles without, connected to anchors.
'''
mol = self.molecule

for i, (a, b) in enumerate(zip(self.anchors, self.bases)):
target = [ k for k in mol.neighbors(a) if k not in b ]
valence = len(target) + len(b)

if valence > 2 and len(b) == valence - 1:
# Trivial chiral/flat/bipyramidal/octahedral/...
mol.nodes[target[0]]['position'] = _out(mol, anchor=a, base=b)
for k in self.limbs:
k.discard(target[0])
# Simply update this segment and return it
self.bases[i] = [a]
self.anchors[i] = target[0]
return [self]

if valence == 4 and len(b) == 2:
# Trivial chiral - except for the actual chirality
# a simple 'up' or 'down' flag would be helpful
# amino acid side chain is 'up'
up, down = _chiral(mol, anchor=a, base=b)
mol.nodes[target[0]]['position'] = up
mol.nodes[target[1]]['position'] = down
# Simply update this segment and return it
for k in self.limbs:
k.discard(target[0])
self.bases[i] = [a]
self.anchors[i] = target[0]
return [self]

if valence == 1:
# Generate a sphere of points
# Check for overlaps
# Take a pick
# Should be done last anyway
# Best collected and stored
continue

# So a valence of 2 (missing 1), 3 (missing 2) or 4 (missing 3)
# The cases 2(1), 3(2), 4(3) can be solved by sampling possible
# candidate points from a conical set.

if len(b) == 1 and all(len(s) < 100 for s in self.limbs):
ax = mol.nodes[a]['position']
bx = mol.nodes[b[0]]['position']
P = conicring(axis=ax-bx) + ax
if coords is not None:
P[mindist(P, coords) < contact**2] = np.nan
P = P.reshape((len(self.limbs), -1, 3))
for px in P.transpose((1,0,2)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Little bit of lint

if px.max() == np.nan:
continue
# Split the limbs to new segments and return those
#out = []
for t, pi in zip(target, px):
self.molecule.nodes[t]['position'] = pi
# for k in self.limbs:
# if len(k) > 1 and t in k:
# k.discard(t)
# for g in nx.connected_components(self.molecule.subgraph(k)):
# out.append(
# Segment(
# self.molecule,
# [(t, [[a]], g)],
# self.coords,
# self.contact
# )
# )
# This commented-out stuff may be useful for speeding up
# coordinate generation, because it avoids needing lookup
# of missing coordinates in subsequent cycles.
#return out
return
break
return None


class GenerateCoordinates(Processor):
"""
A processor for generating coordinates for atoms without.
"""
def __init__(self, contact=0.5):
super().__init__()
self.contact = contact
self.coordinates = None

def run_system(self, system):
"""
Process `system`.

Parameters
----------
system: vermouth.system.System
The system to process. Is modified in-place.
"""

self.coordinates = np.stack(
[
a.get('position', (np.nan, np.nan, np.nan))
for m in system.molecules
for i, a in m.atoms
]
)

for molecule in system.molecules:
self.run_molecule(molecule)

def run_molecule(self, molecule):
"""
Process a single molecule. Must be implemented by subclasses.

Parameters
----------
molecule: vermouth.molecule.Molecule
The molecule to process.

Returns
-------
vermouth.molecule.Molecule
The provided molecule with complete coordinates
"""

# Missing atoms - those without 'positions'
missing = get_atoms_missing_coords(molecule)

prev = len(molecule)
while prev - len(missing) > 0:
# Limbs: subgraphs of missing atoms with anchors and bases
limbs = []
for g in nx.connected_components(molecule.subgraph(missing)):
anchors = list(get_anchors(molecule, g))
bases = [ list(get_anchors(molecule, [a])) for a in anchors ]
pckroon marked this conversation as resolved.
Show resolved Hide resolved
limbs.append((anchors, bases, g))
limbs.sort()

# Segment: subgraphs of limbs sharing anchors and bases
segments = []
prev = None
for lim in limbs:
if lim[0] != prev:
segments.append([])
prev = lim[0]
segments[-1].append(lim)
segments = [ Segment(molecule, limbs) for limbs in segments ]

# Extrude segments one layer at a time
while segments:
s = segments.pop(0)
r = s.extrude(coords=self.coordinates, contact=self.contact)
if r is not None:
segments.extend(r)

prev = len(missing)
missing = get_atoms_missing_coords(molecule)
#print(len(missing))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leftover


return molecule