-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix/157 #161
base: master
Are you sure you want to change the base?
Fix/157 #161
Changes from all commits
c753dc4
ce3c23e
7d1522c
9c011f1
9a73050
b74206e
06b1db3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -181,5 +181,49 @@ def get_all_predecessors(graph, node, start_node=0): | |
predecessors.append(pre_node) | ||
if pre_node == start_node: | ||
break | ||
|
||
predecessors.reverse() | ||
return predecessors | ||
|
||
def _find_longest_path(graph, source, target): | ||
all_paths = list(nx.all_simple_edge_paths(graph, source, target=target)) | ||
max_path_length = 0 | ||
idx = 0 | ||
for jdx, path in enumerate(all_paths): | ||
if len(path) > max_path_length: | ||
max_path_length = len(path) | ||
idx = jdx | ||
return all_paths[idx] | ||
|
||
def polyply_custom_tree(graph, source, target): | ||
# find all simple paths from source to target | ||
longest_path = _find_longest_path(graph, source, target) | ||
path_nodes = { u for edge in longest_path for u in edge } | ||
# substract path and extract the remaining graphs of the branches | ||
graph_copy = graph.copy() | ||
graph_copy.remove_edges_from(longest_path) | ||
components = list(nx.connected_components(graph_copy)) | ||
# loop over the edges in the longest path and | ||
# see if they are part of a connected component | ||
# then add the bfs tree edges of that component to the | ||
# graph before the edge | ||
tree_graph = nx.DiGraph() | ||
for from_node, to_node in longest_path: | ||
for c in components: | ||
if from_node in c: | ||
break | ||
|
||
subgraph = graph_copy.subgraph(c) | ||
for u, v in nx.bfs_tree(subgraph, source=from_node).edges: | ||
if u not in path_nodes or v not in path_nodes: | ||
tree_graph.add_edge(u, v) | ||
tree_graph.add_edge(from_node, to_node) | ||
|
||
return tree_graph | ||
Comment on lines
+198
to
+222
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't hate me, but did you see https://networkx.org/documentation/stable/reference/algorithms/tree.html#module-networkx.algorithms.tree.branchings and https://networkx.org/documentation/stable/reference/algorithms/tree.html#module-networkx.algorithms.tree.mst? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes but that is intended. The polyply search tree should be a directed graph without cycles. The tree should have been a separate PR but since it made it in. Basically the idea is to get rid of the dfs vs bfs thing for cycles. To do that I need to make a tree which essentially is a DFS tree along the backbone (i.e. longest path) but the branches are placed bfs like with cycles being replaced by distance restraints. |
||
|
||
def annotate_hierarchy(graph): | ||
hierarchy= {} | ||
for idx, ndx in enumerate(graph.nodes()): | ||
hierarchy[ndx] = idx | ||
nx.set_node_attributes(graph, hierarchy, "hierarchy") | ||
return graph |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,51 @@ | |
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
import networkx as nx | ||
from .graph_utils import compute_avg_step_length, get_all_predecessors | ||
from polyply.src.graph_utils import compute_avg_step_length, get_all_predecessors | ||
|
||
def upper_bound(avg_step_length, distance, graph_dist, tolerance=0): | ||
bound = graph_dist * avg_step_length + distance + tolerance | ||
return bound | ||
|
||
def lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance=0): | ||
bound = avg_needed_step_length * graph_dist_ref - tolerance | ||
return bound | ||
|
||
def _set_restraint_on_path(graph, | ||
path, | ||
avg_step_length, | ||
distance, | ||
tolerance, | ||
ref_node=None, | ||
target_node=None): | ||
|
||
# graph distances can be computed from the path positions | ||
graph_distances_ref = {node: path.index(node) for node in path} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expensive! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but its cheaper than computing the disjstra path length no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. Consider however, the following: |
||
graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path} | ||
|
||
if not target_node: | ||
target_node = path[-1] | ||
|
||
if not ref_node: | ||
ref_node = path[0] | ||
|
||
avg_needed_step_length = distance / graph_distances_target[ref_node] | ||
|
||
for node in path[1:]: | ||
if node == target_node: | ||
graph_dist_ref = 1.0 | ||
else: | ||
graph_dist_ref = graph_distances_target[node] | ||
|
||
graph_dist_target = graph_distances_target[node] | ||
up_bound = upper_bound(avg_step_length, distance, graph_dist_ref, tolerance) | ||
low_bound = lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance) | ||
|
||
current_restraints = graph.nodes[node].get('distance_restraints', []) | ||
graph.nodes[node]['distance_restraints'] = current_restraints + [(ref_node, | ||
up_bound, | ||
low_bound)] | ||
|
||
|
||
def set_distance_restraint(molecule, | ||
target_node, | ||
|
@@ -50,37 +94,70 @@ def set_distance_restraint(molecule, | |
tolerance: float | ||
absolute tolerance (nm) | ||
""" | ||
# if the target node is a predecssor to the ref node the order | ||
# needs to be reveresed because the target node will be placed | ||
# before the ref node | ||
# we get the path lying between target and reference node | ||
try: | ||
path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node) | ||
except IndexError: | ||
ref_node, target_node = target_node, ref_node | ||
path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node) | ||
# First we need to figure out if the two nodes to be restrained are | ||
# each others common ancestor. This breaks on cyclic graphs | ||
ancestor = nx.algorithms.lowest_common_ancestor(molecule.search_tree, ref_node, target_node) | ||
# if the ancestor is equal to the target node we have to switch the | ||
# reference and target node | ||
paths = [] | ||
if ancestor == target_node: | ||
ref_nodes = [target_node] | ||
target_nodes = [ref_node] | ||
distances = [distance] | ||
# if ancestor is neither target nor ref_node, there is no continous path | ||
# between the two. In this case we have to split the distance restraint | ||
# into two parts | ||
elif ancestor != ref_node: | ||
print("go here") | ||
# if target node is to be placed before ref node we need to switch them around | ||
# otherwise the designations are fine | ||
if molecule.search_tree.nodes[ref_node]["hierarchy"] >\ | ||
molecule.search_tree.nodes[target_node]["hierarchy"]: | ||
ref_node, target_node = (target_node, ref_node) | ||
|
||
# graph distances can be computed from the path positions | ||
graph_distances_ref = {node: path.index(node) for node in path} | ||
graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path} | ||
ref_nodes = [ancestor, ancestor] | ||
target_nodes = [ref_node, target_node] | ||
|
||
for node in path: | ||
if node == target_node: | ||
graph_distance = 1.0 | ||
elif node == ref_node: | ||
continue | ||
else: | ||
graph_distance = graph_distances_target[node] | ||
# The first part of the split distance restraint is a new restraint | ||
# that is the average expected distance between the ref node and the | ||
# common ancestor | ||
path = get_all_predecessors(molecule.search_tree, | ||
node=ref_node, | ||
start_node=ancestor) | ||
|
||
upper_bound = graph_distance * avg_step_length + distance + tolerance | ||
graph_dist_ref_target = len(path) - 1 | ||
up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref_target) | ||
low_bound = lower_bound(distance, graph_dist_ref_target, graph_dist_ref_target, tolerance) | ||
partial_distance = (up_bound + low_bound) / 2. | ||
paths.append(path) | ||
distances = [partial_distance] | ||
# then we put the other distance restraint that acts like the full one but | ||
# on a partial path between ancestor and target node | ||
path = get_all_predecessors(molecule.search_tree, | ||
node=target_node, | ||
start_node=ancestor) | ||
paths.append(path) | ||
distances.append(distance) | ||
# if ancestor is equal to ref node all order is full-filled and we just proceed | ||
else: | ||
ref_nodes = [ref_node] | ||
target_nodes = [target_node] | ||
distances = [distance] | ||
|
||
avg_needed_step_length = distance / graph_distances_target[ref_node] | ||
lower_bound = avg_needed_step_length * graph_distances_ref[node] - tolerance | ||
if not paths: | ||
paths = [get_all_predecessors(molecule.search_tree, | ||
node=target_nodes[0], | ||
start_node=ref_nodes[0])] | ||
|
||
current_restraints = molecule.nodes[node].get('distance_restraints', []) | ||
molecule.nodes[node]['distance_restraints'] = current_restraints + [(ref_node, | ||
upper_bound, | ||
lower_bound)] | ||
for ref_node, target_node, dist, path in zip(ref_nodes, target_nodes, distances, paths): | ||
print(ref_node, target_node, path) | ||
_set_restraint_on_path(molecule, | ||
path, | ||
avg_step_length, | ||
dist, | ||
tolerance, | ||
ref_node, | ||
target_node) | ||
|
||
def set_restraints(topology, nonbond_matrix): | ||
""" | ||
|
@@ -96,9 +173,6 @@ def set_restraints(topology, nonbond_matrix): | |
distance_restraints = topology.distance_restraints[(mol_name, mol_idx)] | ||
mol = topology.molecules[mol_idx] | ||
|
||
if set(dict(nx.degree(mol)).values()) != {1, 2}: | ||
raise IOError("Distance restraints currently can only be applied to linear molecules.") | ||
|
||
for ref_node, target_node in distance_restraints: | ||
path = list(mol.search_tree.edges) | ||
avg_step_length, _ = compute_avg_step_length(mol, | ||
|
@@ -107,4 +181,9 @@ def set_restraints(topology, nonbond_matrix): | |
path) | ||
|
||
distance, tolerance = distance_restraints[(ref_node, target_node)] | ||
set_distance_restraint(mol, target_node, ref_node, distance, avg_step_length, tolerance) | ||
set_distance_restraint(mol, | ||
target_node, | ||
ref_node, | ||
distance, | ||
avg_step_length, | ||
tolerance) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,4 @@ pytest-cov | |
pylint | ||
codecov | ||
tqdm | ||
hypothesis-networkx |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unless you want to deal with multiple longest paths
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I want to deal with it in the sense that there can be more than one with equivalent path length. But the smaller code should account for that I guess?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. Then you should get the one with the lowest index, but don't pin me on that.