marrink-lab · fgrunewald · Oct 18, 2021 · Oct 22, 2021 · Nov 8, 2021 · Nov 8, 2021
diff --git a/polyply/src/gen_coords.py b/polyply/src/gen_coords.py
@@ -74,8 +74,8 @@ def _initialize_cylces(topology, cycles, tolerance):
         for mol_idx in topology.mol_idx_by_name[mol_name]:
             # initalize as dfs tree
             molecule = topology.molecules[mol_idx]
-            if set(dict(nx.degree(molecule)).values()) != {1, 2}:
-                raise IOError("Only linear cyclic molecules are allowed at the moment.")
+            #if set(dict(nx.degree(molecule)).values()) != {1, 2}:
+            #    raise IOError("Only linear cyclic molecules are allowed at the moment.")
             molecule.dfs=True
             nodes = (list(molecule.search_tree.edges)[0][0],
                      list(molecule.search_tree.edges)[-1][1])

diff --git a/polyply/src/graph_utils.py b/polyply/src/graph_utils.py
@@ -181,5 +181,49 @@ def get_all_predecessors(graph, node, start_node=0):
         predecessors.append(pre_node)
         if pre_node == start_node:
             break
+
     predecessors.reverse()
     return predecessors
+
+def _find_longest_path(graph, source, target):
+    all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
+    max_path_length = 0
+    idx = 0
+    for jdx, path in enumerate(all_paths):
+        if len(path) > max_path_length:
+            max_path_length = len(path)
+            idx = jdx
+    return all_paths[idx]
-    all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
-    max_path_length = 0
-    idx = 0
-    for jdx, path in enumerate(all_paths):
-        if len(path) > max_path_length:
-            max_path_length = len(path)
-            idx = jdx
-    return all_paths[idx]
+    all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
+    return max(all_paths, key=len)
-    all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
-    max_path_length = 0
-    idx = 0
-    for jdx, path in enumerate(all_paths):
-        if len(path) > max_path_length:
-            max_path_length = len(path)
-            idx = jdx
-    return all_paths[idx]
+    all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
+    return max(all_paths, key=len)
+
+def polyply_custom_tree(graph, source, target):
+    # find all simple paths from source to target
+    longest_path = _find_longest_path(graph, source, target)
+    path_nodes = { u for edge in longest_path for u in edge }
+    # substract path and extract the remaining graphs of the branches
+    graph_copy = graph.copy()
+    graph_copy.remove_edges_from(longest_path)
+    components = list(nx.connected_components(graph_copy))
+    # loop over the edges in the longest path and
+    # see if they are part of a connected component
+    # then add the bfs tree edges of that component to the
+    # graph before the edge
+    tree_graph = nx.DiGraph()
+    for from_node, to_node in longest_path:
+        for c in components:
+            if from_node in c:
+                break
+
+        subgraph = graph_copy.subgraph(c)
+        for u, v in nx.bfs_tree(subgraph, source=from_node).edges:
+            if u not in path_nodes or v not in path_nodes:
+                tree_graph.add_edge(u, v)
+        tree_graph.add_edge(from_node, to_node)
+
+    return tree_graph
+
+def annotate_hierarchy(graph):
+    hierarchy= {}
+    for idx, ndx in enumerate(graph.nodes()):
+        hierarchy[ndx] = idx
+    nx.set_node_attributes(graph, hierarchy, "hierarchy")
+    return graph
diff --git a/polyply/src/meta_molecule.py b/polyply/src/meta_molecule.py
@@ -18,7 +18,7 @@
 from vermouth.graph_utils import make_residue_graph
 from vermouth.log_helpers import StyleAdapter, get_logger
 from .polyply_parser import read_polyply
-from .graph_utils import find_nodes_with_attributes
+from .graph_utils import find_nodes_with_attributes, annotate_hierarchy
 
 Monomer = namedtuple('Monomer', 'resname, n_blocks')
 LOGGER = StyleAdapter(get_logger(__name__))
@@ -196,6 +196,7 @@ def search_tree(self):
             else:
                 self.__search_tree = nx.dfs_tree(self, source=self.root)
 
+        annotate_hierarchy(self.__search_tree)
         return self.__search_tree
 
     @staticmethod

diff --git a/polyply/src/restraints.py b/polyply/src/restraints.py
@@ -12,7 +12,51 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import networkx as nx
-from .graph_utils import compute_avg_step_length, get_all_predecessors
+from polyply.src.graph_utils import compute_avg_step_length, get_all_predecessors
+
+def upper_bound(avg_step_length, distance, graph_dist, tolerance=0):
+    bound = graph_dist * avg_step_length + distance + tolerance
+    return bound
+
+def lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance=0):
+    bound = avg_needed_step_length * graph_dist_ref - tolerance
+    return bound
+
+def _set_restraint_on_path(graph,
+                           path,
+                           avg_step_length,
+                           distance,
+                           tolerance,
+                           ref_node=None,
+                           target_node=None):
+
+    # graph distances can be computed from the path positions
+    graph_distances_ref = {node: path.index(node) for node in path}
+    graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path}
+
+    if not target_node:
+        target_node = path[-1]
+
+    if not ref_node:
+        ref_node = path[0]
+
+    avg_needed_step_length = distance / graph_distances_target[ref_node]
+
+    for node in path[1:]:
+        if node == target_node:
+            graph_dist_ref = 1.0
+        else:
+            graph_dist_ref = graph_distances_target[node]
+
+        graph_dist_target = graph_distances_target[node]
+        up_bound = upper_bound(avg_step_length, distance, graph_dist_ref, tolerance)
+        low_bound = lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance)
+
+        current_restraints = graph.nodes[node].get('distance_restraints', [])
+        graph.nodes[node]['distance_restraints'] = current_restraints + [(ref_node,
+                                                                          up_bound,
+                                                                          low_bound)]
+
 
 def set_distance_restraint(molecule,
                            target_node,
@@ -50,37 +94,70 @@ def set_distance_restraint(molecule,
     tolerance: float
         absolute tolerance (nm)
     """
-    # if the target node is a predecssor to the ref node the order
-    # needs to be reveresed because the target node will be placed
-    # before the ref node
-    # we get the path lying between target and reference node
-    try:
-        path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node)
-    except IndexError:
-        ref_node, target_node = target_node, ref_node
-        path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node)
+    # First we need to figure out if the two nodes to be restrained are
+    # each others common ancestor. This breaks on cyclic graphs
+    ancestor = nx.algorithms.lowest_common_ancestor(molecule.search_tree, ref_node, target_node)
+    # if the ancestor is equal to the target node we have to switch the
+    # reference and target node
+    paths = []
+    if ancestor == target_node:
+        ref_nodes = [target_node]
+        target_nodes = [ref_node]
+        distances = [distance]
+    # if ancestor is neither target nor ref_node, there is no continous path
+    # between the two. In this case we have to split the distance restraint
+    # into two parts
+    elif ancestor != ref_node:
+        print("go here")
+        # if target node is to be placed before ref node we need to switch them around
+        # otherwise the designations are fine
+        if molecule.search_tree.nodes[ref_node]["hierarchy"] >\
+           molecule.search_tree.nodes[target_node]["hierarchy"]:
+            ref_node, target_node = (target_node, ref_node)
 
-    # graph distances can be computed from the path positions
-    graph_distances_ref = {node: path.index(node) for node in path}
-    graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path}
+        ref_nodes = [ancestor, ancestor]
+        target_nodes = [ref_node, target_node]
 
-    for node in path:
-       if node == target_node:
-           graph_distance = 1.0
-       elif node == ref_node:
-           continue
-       else:
-           graph_distance = graph_distances_target[node]
+        # The first part of the split distance restraint is a new restraint
+        # that is the average expected distance between the ref node and the
+        # common ancestor
+        path = get_all_predecessors(molecule.search_tree,
+                                    node=ref_node,
+                                    start_node=ancestor)
 
-       upper_bound = graph_distance * avg_step_length + distance + tolerance
+        graph_dist_ref_target = len(path) - 1
+        up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref_target)
+        low_bound = lower_bound(distance, graph_dist_ref_target, graph_dist_ref_target, tolerance)
+        partial_distance = (up_bound + low_bound) / 2.
+        paths.append(path)
+        distances = [partial_distance]
+        # then we put the other distance restraint that acts like the full one but
+        # on a partial path between ancestor and target node
+        path = get_all_predecessors(molecule.search_tree,
+                                    node=target_node,
+                                    start_node=ancestor)
+        paths.append(path)
+        distances.append(distance)
+    # if ancestor is equal to ref node all order is full-filled and we just proceed
+    else:
+        ref_nodes = [ref_node]
+        target_nodes = [target_node]
+        distances = [distance]
 
-       avg_needed_step_length = distance / graph_distances_target[ref_node]
-       lower_bound = avg_needed_step_length * graph_distances_ref[node] - tolerance
+    if not paths:
+        paths = [get_all_predecessors(molecule.search_tree,
+                                      node=target_nodes[0],
+                                      start_node=ref_nodes[0])]
 
-       current_restraints = molecule.nodes[node].get('distance_restraints', [])
-       molecule.nodes[node]['distance_restraints'] = current_restraints + [(ref_node,
-                                                                            upper_bound,
-                                                                            lower_bound)]
+    for ref_node, target_node, dist, path in zip(ref_nodes, target_nodes, distances, paths):
+        print(ref_node, target_node, path)
+        _set_restraint_on_path(molecule,
+                               path,
+                               avg_step_length,
+                               dist,
+                               tolerance,
+                               ref_node,
+                               target_node)
 
 def set_restraints(topology, nonbond_matrix):
     """
@@ -96,9 +173,6 @@ def set_restraints(topology, nonbond_matrix):
         distance_restraints = topology.distance_restraints[(mol_name, mol_idx)]
         mol = topology.molecules[mol_idx]
 
-        if set(dict(nx.degree(mol)).values()) != {1, 2}:
-            raise IOError("Distance restraints currently can only be applied to linear molecules.")
-
         for ref_node, target_node in distance_restraints:
             path = list(mol.search_tree.edges)
             avg_step_length, _ = compute_avg_step_length(mol,
@@ -107,4 +181,9 @@ def set_restraints(topology, nonbond_matrix):
                                                          path)
 
             distance, tolerance = distance_restraints[(ref_node, target_node)]
-            set_distance_restraint(mol, target_node, ref_node, distance, avg_step_length, tolerance)
+            set_distance_restraint(mol,
+                                   target_node,
+                                   ref_node,
+                                   distance,
+                                   avg_step_length,
+                                   tolerance)
diff --git a/polyply/tests/test_restraints.py b/polyply/tests/test_restraints.py
@@ -17,8 +17,12 @@
 import pytest
 import numpy as np
 import networkx as nx
+from hypothesis import given
+from hypothesis_networkx import graph_builder
+from hypothesis import strategies as st
 import polyply
 from polyply.tests.test_build_file_parser import test_molecule
+from polyply.src.meta_molecule import MetaMolecule
 
 @pytest.mark.parametrize('target_node, ref_node, distance, avg_step_length, tolerance, expected',(
     # test simple revers
@@ -67,3 +71,35 @@ def test_set_distance_restraint(test_molecule,
             assert pytest.approx(ref_restr[1], new_restr[1])
             assert pytest.approx(ref_restr[2], new_restr[2])
 
+
+
+
+builder = graph_builder(graph_type=nx.Graph,
+                        node_keys=st.integers(),
+                        min_nodes=4, max_nodes=60,
+                        min_edges=3, max_edges=None,
+                        self_loops=True,
+                        connected=True)
+@given(graph=builder)
+def test_restraints_on_abitr_topology(graph):
+    test_molecule = MetaMolecule()
+    test_molecule.add_nodes_from(graph.nodes)
+    test_molecule.add_edges_from(graph.edges)
+
+    ref_node, target_node = np.random.choice(list(graph.nodes), replace=False, size=2)
+
+  # if set(dict(nx.degree(test_molecule)).values()) != {1, 2}:
+  #     ancestor = nx.algorithms.lowest_common_ancestor(test_molecule.search_tree, ref_node, target_node)
+  #     if ancestor != ref_node and ancestor != target_node:
+  #         assert False
+
+    distance = 4
+    avg_step_length = 0.47
+    tolerance = 0
+    polyply.src.restraints.set_distance_restraint(test_molecule,
+                                                  target_node,
+                                                  ref_node,
+                                                  distance,
+                                                  avg_step_length,
+                                                  tolerance)
+    assert len(nx.get_node_attributes(test_molecule, "distance_restraints")) != 0
diff --git a/requirements-tests.txt b/requirements-tests.txt
@@ -4,3 +4,4 @@ pytest-cov
 pylint
 codecov
 tqdm
+hypothesis-networkx
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,3 +4,4 @@ pytest-cov @@
     pylint
     codecov
     tqdm
+    hypothesis-networkx