-
Notifications
You must be signed in to change notification settings - Fork 46
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Autogenerate modification auto-mappings #604
base: master
Are you sure you want to change the base?
Changes from all commits
abd9436
37baebf
bc77b9d
1aee132
dc44f44
08fa681
e430b72
3cddb63
9e53e17
170818a
533eba1
dfadbbb
05fd326
44d9235
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
molecule. | ||
""" | ||
from collections import defaultdict | ||
from copy import deepcopy | ||
from itertools import product, combinations | ||
|
||
import networkx as nx | ||
|
@@ -274,7 +275,7 @@ | |
LOGGER.warning("Can't find modification mappings for the " | ||
"modifications {}. The following modification " | ||
"mappings are known: {}", | ||
list(group), known_mod_mappings, | ||
list(group), sorted(known_mod_mappings.keys()), | ||
type='unmapped-atom') | ||
continue | ||
needed_mod_mappings.update(covered_by) | ||
|
@@ -414,6 +415,8 @@ | |
mod_to_out = {} | ||
# Some nodes of modification will already exist. The question is | ||
# which, and which index they have in graph_out. | ||
# We'll store these anchors for now | ||
anchors = set() | ||
for mod_idx in modification: | ||
if not node_should_exist(modification, mod_idx): | ||
# Node does not exist yet. | ||
|
@@ -423,6 +426,7 @@ | |
out_idx = max(graph_out) + 1 | ||
mod_to_out[mod_idx] = out_idx | ||
graph_out.add_node(out_idx, **modification.nodes[mod_idx]) | ||
graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) | ||
else: | ||
# Node should already exist | ||
# We need to find the out_index of this node. Since the | ||
|
@@ -445,12 +449,20 @@ | |
raise ValueError("No node found in molecule with " | ||
"atomname {}".format(modification.nodes[mod_idx]['atomname'])) | ||
# Undefined loop variable is guarded against by the else-raise above | ||
anchors.add(mod_idx) | ||
mod_to_out[mod_idx] = out_idx # pylint: disable=undefined-loop-variable | ||
graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) # pylint: disable=undefined-loop-variable | ||
graph_out.nodes[out_idx]['modifications'] = graph_out.nodes[out_idx].get('modifications', []) | ||
if modification not in graph_out.nodes[out_idx]['modifications']: | ||
graph_out.nodes[out_idx]['modifications'].append(modification) | ||
|
||
# FIXME Jank here to ensure the charge_group attributes look reasonable | ||
charge_group_start = max(graph_out.nodes[mod_to_out[idx]].get('charge_group', 1) for idx in anchors) if anchors else 1 | ||
for charge_group, mod_idx in enumerate(modification, charge_group_start): | ||
out_idx = mod_to_out[mod_idx] | ||
if 'charge_group' not in graph_out.nodes[out_idx]: | ||
graph_out.nodes[out_idx]['charge_group'] = charge_group | ||
|
||
for mol_idx in mol_to_mod: | ||
for mod_idx, weight in mol_to_mod[mol_idx].items(): | ||
out_idx = mod_to_out[mod_idx] | ||
|
@@ -541,8 +553,8 @@ | |
`molecule`. | ||
attribute_stash: tuple[str] | ||
The attributes that will always be transferred from the input molecule | ||
to the produced graph, but prefixed with _old_.Thus they are new attributes | ||
and are not conflicting with already defined attributes. | ||
to the produced graph, but prefixed with _old_. Thus, they are new | ||
attributes and are not conflicting with already defined attributes. | ||
|
||
|
||
Returns | ||
|
@@ -632,48 +644,45 @@ | |
to_remove = set() | ||
for out_idx in out_to_mol: | ||
mol_idxs = out_to_mol[out_idx].keys() | ||
# Keep track of what bead comes from where | ||
|
||
# Time to collect the attributes to set. Start by grabbing all the | ||
# attributes we already have. | ||
node_attrs = deepcopy(graph_out.nodes[out_idx]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. deepcopy because of the graph attribute? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. And paranoia. |
||
|
||
# In all cases, keep track of what bead comes from where | ||
subgraph = molecule.subgraph(mol_idxs) | ||
graph_out.nodes[out_idx]['graph'] = subgraph | ||
node_attrs['graph'] = subgraph | ||
weights = out_to_mol[out_idx] | ||
graph_out.nodes[out_idx]['mapping_weights'] = weights | ||
node_attrs['mapping_weights'] = weights | ||
|
||
# Now we find attribute values from molecule. | ||
if out_idx in all_references: | ||
ref_idx = all_references[out_idx] | ||
new_attrs = attrs_from_node(molecule.nodes[ref_idx], | ||
attribute_keep+attribute_must+attribute_stash) | ||
for attr, val in new_attrs.items(): | ||
# Attrs in attribute_keep we always transfer, those in | ||
# attribute_must we transfer only if they're not already in the | ||
# created node | ||
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: | ||
graph_out.nodes[out_idx].update(new_attrs) | ||
mol_attrs = attrs_from_node(molecule.nodes[ref_idx], attribute_keep + attribute_stash) | ||
for attr, val in mol_attrs.items(): | ||
# This is if/if rather than if/elif on purpose. It could be that | ||
# an attribute needs to be both stashed and kept | ||
if attr in attribute_stash: | ||
graph_out.nodes[out_idx]["_old_"+attr] = val | ||
node_attrs["_old_" + attr] = val | ||
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: | ||
node_attrs[attr] = val | ||
else: | ||
attrs = defaultdict(list) | ||
attrs_not_sane = [] | ||
for mol_idx in mol_idxs: | ||
new_attrs = attrs_from_node(molecule.nodes[mol_idx], | ||
attribute_keep+attribute_must+attribute_stash) | ||
for attr, val in new_attrs.items(): | ||
mol_attrs = attrs_from_node(molecule.nodes[mol_idx], attribute_keep + attribute_stash) | ||
for attr, val in mol_attrs.items(): | ||
attrs[attr].append(val) | ||
attrs_not_sane = [] | ||
for attr, vals in attrs.items(): | ||
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: | ||
if vals: | ||
graph_out.nodes[out_idx][attr] = vals[0] | ||
else: | ||
# No nodes hat the attribute. | ||
graph_out.nodes[out_idx][attr] = None | ||
if attr in attribute_stash: | ||
if vals: | ||
graph_out.nodes[out_idx]["_old_"+attr] = vals[0] | ||
else: | ||
# No nodes hat the attribute. | ||
graph_out.nodes[out_idx]["_old_"+attr] = None | ||
|
||
for attr, vals in attrs.items(): | ||
if not are_all_equal(vals): | ||
attrs_not_sane.append(attr) | ||
# This is if/if rather than if/elif on purpose. It could be that | ||
# an attribute needs to be both stashed and kept | ||
if attr in attribute_stash: | ||
node_attrs["_old_" + attr] = vals[0] | ||
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: | ||
node_attrs[attr] = vals[0] | ||
|
||
if attrs_not_sane: | ||
LOGGER.warning('The attributes {} for atom {} are going to' | ||
|
@@ -682,6 +691,7 @@ | |
attrs_not_sane, | ||
format_atom_string(graph_out.nodes[out_idx]), | ||
type='inconsistent-data') | ||
graph_out.nodes[out_idx].update(node_attrs) | ||
if graph_out.nodes[out_idx].get('atomname', '') is None: | ||
to_remove.add(out_idx) | ||
|
||
|
@@ -757,6 +767,34 @@ | |
for idx in other_uncovered], | ||
type='unmapped-atom') | ||
|
||
# "None to one" mapping - Strictly speaking this cannot happen, but it could | ||
# be that output particles map to only particles that were not present in | ||
# the input structure. This probably causes massive issues downstream. | ||
# For now, flag a sane warning, and at the same time transfer missing "must" | ||
# attributes from nearby atoms. | ||
for node in graph_out: | ||
for attr in attribute_must: | ||
if attr not in graph_out.nodes[node]: | ||
# We can skip `node`, since if it had the attribute required we | ||
# wouldn't be in this mess to begin with. | ||
# Set a depth_limit to keep the runtime within bounds. | ||
close_nodes = (v for (u, v) in nx.bfs_edges(graph_out, source=node, depth_limit=3)) | ||
for template_node in close_nodes: | ||
if attr in graph_out.nodes[template_node]: | ||
graph_out.nodes[node][attr] = deepcopy(graph_out.nodes[template_node][attr]) | ||
LOGGER.warning("Atom {} did not have a {}, so we took " | ||
"it from atom {} instead.", | ||
format_atom_string(graph_out.nodes[node]), | ||
attr, | ||
format_atom_string(graph_out.nodes[template_node]), | ||
type="inconsistent-data") | ||
break | ||
Comment on lines
+775
to
+791
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't get how this can be the case. Is that for hydrogen modifications? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This happens whenever a output particle is made up of only nodes that were reconstructed by repair graph. So unless there's entire sidechains missing you generally only see this with atomistic models, which is why this remained hidden for so long. |
||
else: # no break, attribute not found | ||
LOGGER.warning("Atom {} does not have a {}, and we couldn't " | ||
"find a nearby atom that does.", | ||
format_atom_string(graph_out.nodes[node]), | ||
attr, type="inconsistent-data") | ||
|
||
for interaction_type in modified_interactions: | ||
for atoms, modifications in modified_interactions[interaction_type].items(): | ||
if len(modifications) != 1: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is 1 a good default value? Shouldn't all residues at this stage have a charge group?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should? Yes. But I've been bitten by that word too often. Note that the number picked here does get incremented by the number of nodes before this in the modification. Admittedly it's not great, but it's ok.