Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/157 #161

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions polyply/src/gen_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def _initialize_cylces(topology, cycles, tolerance):
for mol_idx in topology.mol_idx_by_name[mol_name]:
# initalize as dfs tree
molecule = topology.molecules[mol_idx]
if set(dict(nx.degree(molecule)).values()) != {1, 2}:
raise IOError("Only linear cyclic molecules are allowed at the moment.")
#if set(dict(nx.degree(molecule)).values()) != {1, 2}:
# raise IOError("Only linear cyclic molecules are allowed at the moment.")
molecule.dfs=True
nodes = (list(molecule.search_tree.edges)[0][0],
list(molecule.search_tree.edges)[-1][1])
Expand Down
44 changes: 44 additions & 0 deletions polyply/src/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,5 +181,49 @@ def get_all_predecessors(graph, node, start_node=0):
predecessors.append(pre_node)
if pre_node == start_node:
break

predecessors.reverse()
return predecessors

def _find_longest_path(graph, source, target):
all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
max_path_length = 0
idx = 0
for jdx, path in enumerate(all_paths):
if len(path) > max_path_length:
max_path_length = len(path)
idx = jdx
return all_paths[idx]
Comment on lines +189 to +196
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
max_path_length = 0
idx = 0
for jdx, path in enumerate(all_paths):
if len(path) > max_path_length:
max_path_length = len(path)
idx = jdx
return all_paths[idx]
all_paths = list(nx.all_simple_edge_paths(graph, source, target=target))
return max(all_paths, key=len)

Unless you want to deal with multiple longest paths

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to deal with it in the sense that there can be more than one with equivalent path length. But the smaller code should account for that I guess?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Then you should get the one with the lowest index, but don't pin me on that.


def polyply_custom_tree(graph, source, target):
# find all simple paths from source to target
longest_path = _find_longest_path(graph, source, target)
path_nodes = { u for edge in longest_path for u in edge }
# substract path and extract the remaining graphs of the branches
graph_copy = graph.copy()
graph_copy.remove_edges_from(longest_path)
components = list(nx.connected_components(graph_copy))
# loop over the edges in the longest path and
# see if they are part of a connected component
# then add the bfs tree edges of that component to the
# graph before the edge
tree_graph = nx.DiGraph()
for from_node, to_node in longest_path:
for c in components:
if from_node in c:
break

subgraph = graph_copy.subgraph(c)
for u, v in nx.bfs_tree(subgraph, source=from_node).edges:
if u not in path_nodes or v not in path_nodes:
tree_graph.add_edge(u, v)
tree_graph.add_edge(from_node, to_node)

return tree_graph
Comment on lines +198 to +222
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes but that is intended. The polyply search tree should be a directed graph without cycles. The tree should have been a separate PR but since it made it in. Basically the idea is to get rid of the dfs vs bfs thing for cycles. To do that I need to make a tree which essentially is a DFS tree along the backbone (i.e. longest path) but the branches are placed bfs like with cycles being replaced by distance restraints.


def annotate_hierarchy(graph):
hierarchy= {}
for idx, ndx in enumerate(graph.nodes()):
hierarchy[ndx] = idx
nx.set_node_attributes(graph, hierarchy, "hierarchy")
return graph
3 changes: 2 additions & 1 deletion polyply/src/meta_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from vermouth.graph_utils import make_residue_graph
from vermouth.log_helpers import StyleAdapter, get_logger
from .polyply_parser import read_polyply
from .graph_utils import find_nodes_with_attributes
from .graph_utils import find_nodes_with_attributes, annotate_hierarchy

Monomer = namedtuple('Monomer', 'resname, n_blocks')
LOGGER = StyleAdapter(get_logger(__name__))
Expand Down Expand Up @@ -196,6 +196,7 @@ def search_tree(self):
else:
self.__search_tree = nx.dfs_tree(self, source=self.root)

annotate_hierarchy(self.__search_tree)
return self.__search_tree

@staticmethod
Expand Down
141 changes: 110 additions & 31 deletions polyply/src/restraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,51 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx
from .graph_utils import compute_avg_step_length, get_all_predecessors
from polyply.src.graph_utils import compute_avg_step_length, get_all_predecessors

def upper_bound(avg_step_length, distance, graph_dist, tolerance=0):
bound = graph_dist * avg_step_length + distance + tolerance
return bound

def lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance=0):
bound = avg_needed_step_length * graph_dist_ref - tolerance
return bound

def _set_restraint_on_path(graph,
path,
avg_step_length,
distance,
tolerance,
ref_node=None,
target_node=None):

# graph distances can be computed from the path positions
graph_distances_ref = {node: path.index(node) for node in path}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expensive! path.index costs N for the N=len(path)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but its cheaper than computing the disjstra path length no?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. Consider however, the following:
{node: idx for (idx, node) in enumerate(path)}

graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path}

if not target_node:
target_node = path[-1]

if not ref_node:
ref_node = path[0]

avg_needed_step_length = distance / graph_distances_target[ref_node]

for node in path[1:]:
if node == target_node:
graph_dist_ref = 1.0
else:
graph_dist_ref = graph_distances_target[node]

graph_dist_target = graph_distances_target[node]
up_bound = upper_bound(avg_step_length, distance, graph_dist_ref, tolerance)
low_bound = lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance)

current_restraints = graph.nodes[node].get('distance_restraints', [])
graph.nodes[node]['distance_restraints'] = current_restraints + [(ref_node,
up_bound,
low_bound)]


def set_distance_restraint(molecule,
target_node,
Expand Down Expand Up @@ -50,37 +94,70 @@ def set_distance_restraint(molecule,
tolerance: float
absolute tolerance (nm)
"""
# if the target node is a predecssor to the ref node the order
# needs to be reveresed because the target node will be placed
# before the ref node
# we get the path lying between target and reference node
try:
path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node)
except IndexError:
ref_node, target_node = target_node, ref_node
path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node)
# First we need to figure out if the two nodes to be restrained are
# each others common ancestor. This breaks on cyclic graphs
ancestor = nx.algorithms.lowest_common_ancestor(molecule.search_tree, ref_node, target_node)
# if the ancestor is equal to the target node we have to switch the
# reference and target node
paths = []
if ancestor == target_node:
ref_nodes = [target_node]
target_nodes = [ref_node]
distances = [distance]
# if ancestor is neither target nor ref_node, there is no continous path
# between the two. In this case we have to split the distance restraint
# into two parts
elif ancestor != ref_node:
print("go here")
# if target node is to be placed before ref node we need to switch them around
# otherwise the designations are fine
if molecule.search_tree.nodes[ref_node]["hierarchy"] >\
molecule.search_tree.nodes[target_node]["hierarchy"]:
ref_node, target_node = (target_node, ref_node)

# graph distances can be computed from the path positions
graph_distances_ref = {node: path.index(node) for node in path}
graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path}
ref_nodes = [ancestor, ancestor]
target_nodes = [ref_node, target_node]

for node in path:
if node == target_node:
graph_distance = 1.0
elif node == ref_node:
continue
else:
graph_distance = graph_distances_target[node]
# The first part of the split distance restraint is a new restraint
# that is the average expected distance between the ref node and the
# common ancestor
path = get_all_predecessors(molecule.search_tree,
node=ref_node,
start_node=ancestor)

upper_bound = graph_distance * avg_step_length + distance + tolerance
graph_dist_ref_target = len(path) - 1
up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref_target)
low_bound = lower_bound(distance, graph_dist_ref_target, graph_dist_ref_target, tolerance)
partial_distance = (up_bound + low_bound) / 2.
paths.append(path)
distances = [partial_distance]
# then we put the other distance restraint that acts like the full one but
# on a partial path between ancestor and target node
path = get_all_predecessors(molecule.search_tree,
node=target_node,
start_node=ancestor)
paths.append(path)
distances.append(distance)
# if ancestor is equal to ref node all order is full-filled and we just proceed
else:
ref_nodes = [ref_node]
target_nodes = [target_node]
distances = [distance]

avg_needed_step_length = distance / graph_distances_target[ref_node]
lower_bound = avg_needed_step_length * graph_distances_ref[node] - tolerance
if not paths:
paths = [get_all_predecessors(molecule.search_tree,
node=target_nodes[0],
start_node=ref_nodes[0])]

current_restraints = molecule.nodes[node].get('distance_restraints', [])
molecule.nodes[node]['distance_restraints'] = current_restraints + [(ref_node,
upper_bound,
lower_bound)]
for ref_node, target_node, dist, path in zip(ref_nodes, target_nodes, distances, paths):
print(ref_node, target_node, path)
_set_restraint_on_path(molecule,
path,
avg_step_length,
dist,
tolerance,
ref_node,
target_node)

def set_restraints(topology, nonbond_matrix):
"""
Expand All @@ -96,9 +173,6 @@ def set_restraints(topology, nonbond_matrix):
distance_restraints = topology.distance_restraints[(mol_name, mol_idx)]
mol = topology.molecules[mol_idx]

if set(dict(nx.degree(mol)).values()) != {1, 2}:
raise IOError("Distance restraints currently can only be applied to linear molecules.")

for ref_node, target_node in distance_restraints:
path = list(mol.search_tree.edges)
avg_step_length, _ = compute_avg_step_length(mol,
Expand All @@ -107,4 +181,9 @@ def set_restraints(topology, nonbond_matrix):
path)

distance, tolerance = distance_restraints[(ref_node, target_node)]
set_distance_restraint(mol, target_node, ref_node, distance, avg_step_length, tolerance)
set_distance_restraint(mol,
target_node,
ref_node,
distance,
avg_step_length,
tolerance)
36 changes: 36 additions & 0 deletions polyply/tests/test_restraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@
import pytest
import numpy as np
import networkx as nx
from hypothesis import given
from hypothesis_networkx import graph_builder
from hypothesis import strategies as st
import polyply
from polyply.tests.test_build_file_parser import test_molecule
from polyply.src.meta_molecule import MetaMolecule

@pytest.mark.parametrize('target_node, ref_node, distance, avg_step_length, tolerance, expected',(
# test simple revers
Expand Down Expand Up @@ -67,3 +71,35 @@ def test_set_distance_restraint(test_molecule,
assert pytest.approx(ref_restr[1], new_restr[1])
assert pytest.approx(ref_restr[2], new_restr[2])




builder = graph_builder(graph_type=nx.Graph,
node_keys=st.integers(),
min_nodes=4, max_nodes=60,
min_edges=3, max_edges=None,
self_loops=True,
connected=True)
@given(graph=builder)
def test_restraints_on_abitr_topology(graph):
test_molecule = MetaMolecule()
test_molecule.add_nodes_from(graph.nodes)
test_molecule.add_edges_from(graph.edges)

ref_node, target_node = np.random.choice(list(graph.nodes), replace=False, size=2)

# if set(dict(nx.degree(test_molecule)).values()) != {1, 2}:
# ancestor = nx.algorithms.lowest_common_ancestor(test_molecule.search_tree, ref_node, target_node)
# if ancestor != ref_node and ancestor != target_node:
# assert False

distance = 4
avg_step_length = 0.47
tolerance = 0
polyply.src.restraints.set_distance_restraint(test_molecule,
target_node,
ref_node,
distance,
avg_step_length,
tolerance)
assert len(nx.get_node_attributes(test_molecule, "distance_restraints")) != 0
1 change: 1 addition & 0 deletions requirements-tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ pytest-cov
pylint
codecov
tqdm
hypothesis-networkx