Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autogenerate modification auto-mappings #604

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bin/martinize2
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde
mappings=mappings,
to_ff=to_ff,
delete_unknown=delete_unknown,
attribute_keep=("cgsecstruct", "chain"),
attribute_must=("resname",),
attribute_keep=("cgsecstruct", "resname", "chain"),
attribute_must=("resname", "atype"),
attribute_stash=("resid",),
).run_system(system)
LOGGER.info("Averaging the coordinates.", type="step")
Expand Down
14 changes: 12 additions & 2 deletions vermouth/gmx/itp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

import copy
import itertools
from collections import defaultdict
from ..log_helpers import StyleAdapter, get_logger

LOGGER = StyleAdapter(get_logger(__name__))

__all__ = ['write_molecule_itp', ]

Expand Down Expand Up @@ -111,10 +115,16 @@
# Make sure the molecule contains the information required to write the
# [atoms] section. The charge and mass can be ommited, if so gromacs take
# them from the [atomtypes] section of the ITP file.
atoms_with_issues = defaultdict(list)
for attribute in ('atype', 'resid', 'resname', 'atomname',
'charge_group'):
if not all([attribute in atom for _, atom in molecule.atoms]):
raise ValueError('Not all atom have a {}.'.format(attribute))
for _, atom in molecule.atoms:
if attribute not in atom:
atoms_with_issues[attribute].append(atom)

Check warning on line 123 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L123

Added line #L123 was not covered by tests
if atoms_with_issues:
for attr, atoms in atoms_with_issues.items():
LOGGER.error('The following atoms do not have a {}: {}', attr, atoms)
raise ValueError("Some atoms are missing required attributes.")

Check warning on line 127 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L126-L127

Added lines #L126 - L127 were not covered by tests

# Get the maximum length of each atom field so we can align the fields.
# Atom indexes are written as a consecutive series starting from 1.
Expand Down
27 changes: 15 additions & 12 deletions vermouth/map_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,25 +441,23 @@
return dict(mappings)


def generate_self_mappings(blocks):
def generate_self_mappings(force_field):
"""
Generate self mappings from a collection of blocks.
Generate self mappings from a modification for the blocks and modifications.

A self mapping is a mapping that maps a force field to itself. Applying
such mapping is applying a neutral transformation.

Parameters
----------
blocks: dict[str, networkx.Graph]
A dictionary of blocks with block names as keys and the blocks
themselves as values. The blocks must be instances of :class:`networkx.Graph`
with each node having an 'atomname' attribute.
force_field: vermouth.forcefield.ForceField
A force field containing Blocks and Modifications

Returns
-------
mappings: dict[str, tuple]
A dictionary of mappings where the keys are the names of the blocks,
and the values are tuples like (mapping, weights, extra).
mappings: dict[~vermouth.map_parser.Mapping]
A dictionary of mappings where the keys are the names of the
blocks/modifications, and the values are Mappings.

Raises
------
Expand All @@ -474,11 +472,16 @@
Generate self mappings for a list of force fields.
"""
mappings = {}
for name, block in blocks.items():
for name, block in force_field.blocks.items():
mapping = Mapping(block, block, {idx: {idx: 1} for idx in block.nodes},
{}, ff_from=block.force_field, ff_to=block.force_field,
{}, ff_from=force_field, ff_to=force_field,
extra=[], type='block', names=(name,))
mappings[name] = mapping
for name, mod in force_field.modifications.items():
mapping = Mapping(mod, mod, {idx: {idx: 1} for idx in mod.nodes},
{}, ff_from=force_field, ff_to=force_field,
extra=[], type='modification', names=(name,))
mappings[name] = mapping

Check warning on line 484 in vermouth/map_input.py

View check run for this annotation

Codecov / codecov/patch

vermouth/map_input.py#L484

Added line #L484 was not covered by tests
return mappings


Expand All @@ -500,7 +503,7 @@
mappings = collections.defaultdict(dict)
for force_field in force_fields:
name = force_field.name
mappings[name][name] = generate_self_mappings(force_field.blocks)
mappings[name][name] = generate_self_mappings(force_field)
return dict(mappings)


Expand Down
102 changes: 70 additions & 32 deletions vermouth/processors/do_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
molecule.
"""
from collections import defaultdict
from copy import deepcopy
from itertools import product, combinations

import networkx as nx
Expand Down Expand Up @@ -274,7 +275,7 @@
LOGGER.warning("Can't find modification mappings for the "
"modifications {}. The following modification "
"mappings are known: {}",
list(group), known_mod_mappings,
list(group), sorted(known_mod_mappings.keys()),
type='unmapped-atom')
continue
needed_mod_mappings.update(covered_by)
Expand Down Expand Up @@ -414,6 +415,8 @@
mod_to_out = {}
# Some nodes of modification will already exist. The question is
# which, and which index they have in graph_out.
# We'll store these anchors for now
anchors = set()
for mod_idx in modification:
if not node_should_exist(modification, mod_idx):
# Node does not exist yet.
Expand All @@ -423,6 +426,7 @@
out_idx = max(graph_out) + 1
mod_to_out[mod_idx] = out_idx
graph_out.add_node(out_idx, **modification.nodes[mod_idx])
graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {}))
else:
# Node should already exist
# We need to find the out_index of this node. Since the
Expand All @@ -445,12 +449,20 @@
raise ValueError("No node found in molecule with "
"atomname {}".format(modification.nodes[mod_idx]['atomname']))
# Undefined loop variable is guarded against by the else-raise above
anchors.add(mod_idx)
mod_to_out[mod_idx] = out_idx # pylint: disable=undefined-loop-variable
graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) # pylint: disable=undefined-loop-variable
graph_out.nodes[out_idx]['modifications'] = graph_out.nodes[out_idx].get('modifications', [])
if modification not in graph_out.nodes[out_idx]['modifications']:
graph_out.nodes[out_idx]['modifications'].append(modification)

# FIXME Jank here to ensure the charge_group attributes look reasonable
charge_group_start = max(graph_out.nodes[mod_to_out[idx]].get('charge_group', 1) for idx in anchors) if anchors else 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is 1 a good default value? Shouldn't all residues at this stage have a charge group?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should? Yes. But I've been bitten by that word too often. Note that the number picked here does get incremented by the number of nodes before this in the modification. Admittedly it's not great, but it's ok.

for charge_group, mod_idx in enumerate(modification, charge_group_start):
out_idx = mod_to_out[mod_idx]
if 'charge_group' not in graph_out.nodes[out_idx]:
graph_out.nodes[out_idx]['charge_group'] = charge_group

for mol_idx in mol_to_mod:
for mod_idx, weight in mol_to_mod[mol_idx].items():
out_idx = mod_to_out[mod_idx]
Expand Down Expand Up @@ -541,8 +553,8 @@
`molecule`.
attribute_stash: tuple[str]
The attributes that will always be transferred from the input molecule
to the produced graph, but prefixed with _old_.Thus they are new attributes
and are not conflicting with already defined attributes.
to the produced graph, but prefixed with _old_. Thus, they are new
attributes and are not conflicting with already defined attributes.


Returns
Expand Down Expand Up @@ -632,48 +644,45 @@
to_remove = set()
for out_idx in out_to_mol:
mol_idxs = out_to_mol[out_idx].keys()
# Keep track of what bead comes from where

# Time to collect the attributes to set. Start by grabbing all the
# attributes we already have.
node_attrs = deepcopy(graph_out.nodes[out_idx])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deepcopy because of the graph attribute?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. And paranoia.


# In all cases, keep track of what bead comes from where
subgraph = molecule.subgraph(mol_idxs)
graph_out.nodes[out_idx]['graph'] = subgraph
node_attrs['graph'] = subgraph
weights = out_to_mol[out_idx]
graph_out.nodes[out_idx]['mapping_weights'] = weights
node_attrs['mapping_weights'] = weights

# Now we find attribute values from molecule.
if out_idx in all_references:
ref_idx = all_references[out_idx]
new_attrs = attrs_from_node(molecule.nodes[ref_idx],
attribute_keep+attribute_must+attribute_stash)
for attr, val in new_attrs.items():
# Attrs in attribute_keep we always transfer, those in
# attribute_must we transfer only if they're not already in the
# created node
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
graph_out.nodes[out_idx].update(new_attrs)
mol_attrs = attrs_from_node(molecule.nodes[ref_idx], attribute_keep + attribute_stash)
for attr, val in mol_attrs.items():
# This is if/if rather than if/elif on purpose. It could be that
# an attribute needs to be both stashed and kept
if attr in attribute_stash:
graph_out.nodes[out_idx]["_old_"+attr] = val
node_attrs["_old_" + attr] = val

Check warning on line 666 in vermouth/processors/do_mapping.py

View check run for this annotation

Codecov / codecov/patch

vermouth/processors/do_mapping.py#L666

Added line #L666 was not covered by tests
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
node_attrs[attr] = val
else:
attrs = defaultdict(list)
attrs_not_sane = []
for mol_idx in mol_idxs:
new_attrs = attrs_from_node(molecule.nodes[mol_idx],
attribute_keep+attribute_must+attribute_stash)
for attr, val in new_attrs.items():
mol_attrs = attrs_from_node(molecule.nodes[mol_idx], attribute_keep + attribute_stash)
for attr, val in mol_attrs.items():
attrs[attr].append(val)
attrs_not_sane = []
for attr, vals in attrs.items():
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
if vals:
graph_out.nodes[out_idx][attr] = vals[0]
else:
# No nodes hat the attribute.
graph_out.nodes[out_idx][attr] = None
if attr in attribute_stash:
if vals:
graph_out.nodes[out_idx]["_old_"+attr] = vals[0]
else:
# No nodes hat the attribute.
graph_out.nodes[out_idx]["_old_"+attr] = None

for attr, vals in attrs.items():
if not are_all_equal(vals):
attrs_not_sane.append(attr)
# This is if/if rather than if/elif on purpose. It could be that
# an attribute needs to be both stashed and kept
if attr in attribute_stash:
node_attrs["_old_" + attr] = vals[0]
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
node_attrs[attr] = vals[0]

if attrs_not_sane:
LOGGER.warning('The attributes {} for atom {} are going to'
Expand All @@ -682,6 +691,7 @@
attrs_not_sane,
format_atom_string(graph_out.nodes[out_idx]),
type='inconsistent-data')
graph_out.nodes[out_idx].update(node_attrs)
if graph_out.nodes[out_idx].get('atomname', '') is None:
to_remove.add(out_idx)

Expand Down Expand Up @@ -757,6 +767,34 @@
for idx in other_uncovered],
type='unmapped-atom')

# "None to one" mapping - Strictly speaking this cannot happen, but it could
# be that output particles map to only particles that were not present in
# the input structure. This probably causes massive issues downstream.
# For now, flag a sane warning, and at the same time transfer missing "must"
# attributes from nearby atoms.
for node in graph_out:
for attr in attribute_must:
if attr not in graph_out.nodes[node]:
# We can skip `node`, since if it had the attribute required we
# wouldn't be in this mess to begin with.
# Set a depth_limit to keep the runtime within bounds.
close_nodes = (v for (u, v) in nx.bfs_edges(graph_out, source=node, depth_limit=3))
for template_node in close_nodes:
if attr in graph_out.nodes[template_node]:
graph_out.nodes[node][attr] = deepcopy(graph_out.nodes[template_node][attr])
LOGGER.warning("Atom {} did not have a {}, so we took "
"it from atom {} instead.",
format_atom_string(graph_out.nodes[node]),
attr,
format_atom_string(graph_out.nodes[template_node]),
type="inconsistent-data")
break
Comment on lines +775 to +791
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get how this can be the case. Is that for hydrogen modifications?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This happens whenever a output particle is made up of only nodes that were reconstructed by repair graph. So unless there's entire sidechains missing you generally only see this with atomistic models, which is why this remained hidden for so long.

else: # no break, attribute not found
LOGGER.warning("Atom {} does not have a {}, and we couldn't "

Check warning on line 793 in vermouth/processors/do_mapping.py

View check run for this annotation

Codecov / codecov/patch

vermouth/processors/do_mapping.py#L793

Added line #L793 was not covered by tests
"find a nearby atom that does.",
format_atom_string(graph_out.nodes[node]),
attr, type="inconsistent-data")

for interaction_type in modified_interactions:
for atoms, modifications in modified_interactions[interaction_type].items():
if len(modifications) != 1:
Expand Down
4 changes: 2 additions & 2 deletions vermouth/tests/test_do_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def test_apply_mod_mapping(modified_molecule, modifications):
"""
graph_out = Molecule(force_field=FF_UNIVERSAL)
graph_out.add_nodes_from([
(0, {'atomname': 'A', 'resid': 1})
(0, {'atomname': 'A', 'resid': 1, 'charge_group': 1})
])
mol_to_out = {0: {0: 1}}
out_to_mol = {0: {0: 1}}
Expand All @@ -492,7 +492,7 @@ def test_apply_mod_mapping(modified_molecule, modifications):
assert mol_to_out == {0: {0: 1}, 1: {1: 1}}
assert out_to_mol == {0: {0: 1}, 1: {1: 1}}

graph_out.add_node(2, atomname='J', resid=2)
graph_out.add_node(2, atomname='J', resid=2, charge_group=2)
mol_to_out[16] = {2: 1}
out_to_mol[2] = {16: 1}

Expand Down
12 changes: 9 additions & 3 deletions vermouth/tests/test_map_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,15 +522,18 @@ def test_generate_self_mapping():
Test that :func:`vermouth.map_input.generate_self_mappings` works as
expected.
"""
force_field = vermouth.forcefield.ForceField(name='ff')
# Build the input blocks
blocks = {
'A0': vermouth.molecule.Block([['AA', 'BBB'], ['BBB', 'CCCC']]),
'B1': vermouth.molecule.Block([['BBB', 'CCCC'], ['BBB', 'E']]),
}
force_field.blocks = blocks
for name, block in blocks.items():
block.name = name
for atomname, node in block.nodes.items():
node['atomname'] = atomname

# Build the expected output
ref_mappings = {
'A0': (
Expand All @@ -547,10 +550,13 @@ def test_generate_self_mapping():
),
}
# Actually test
mappings = vermouth.map_input.generate_self_mappings(blocks)
mappings = vermouth.map_input.generate_self_mappings(force_field)
for name, map in mappings.items():
ref = ref_mappings[name]
assert map.mapping == ref[0]
assert map.block_to.extra == ref[1]

assert mappings.keys() == ref_mappings.keys()
for blockname in mappings:
assert mappings[blockname].mapping, mappings[blockname].extras == ref_mappings[blockname]


def test_generate_all_self_mappings():
Expand Down