diff --git a/bin/martinize2 b/bin/martinize2 index 38612bb5..d527c4ac 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -179,8 +179,8 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde mappings=mappings, to_ff=to_ff, delete_unknown=delete_unknown, - attribute_keep=("cgsecstruct", "chain"), - attribute_must=("resname",), + attribute_keep=("cgsecstruct", "resname", "chain"), + attribute_must=("resname", "atype"), attribute_stash=("resid",), ).run_system(system) LOGGER.info("Averaging the coordinates.", type="step") diff --git a/vermouth/gmx/itp.py b/vermouth/gmx/itp.py index 821232c3..7f964089 100644 --- a/vermouth/gmx/itp.py +++ b/vermouth/gmx/itp.py @@ -18,6 +18,10 @@ import copy import itertools +from collections import defaultdict +from ..log_helpers import StyleAdapter, get_logger + +LOGGER = StyleAdapter(get_logger(__name__)) __all__ = ['write_molecule_itp', ] @@ -111,10 +115,16 @@ def write_molecule_itp(molecule, outfile, header=(), moltype=None, # Make sure the molecule contains the information required to write the # [atoms] section. The charge and mass can be ommited, if so gromacs take # them from the [atomtypes] section of the ITP file. + atoms_with_issues = defaultdict(list) for attribute in ('atype', 'resid', 'resname', 'atomname', 'charge_group'): - if not all([attribute in atom for _, atom in molecule.atoms]): - raise ValueError('Not all atom have a {}.'.format(attribute)) + for _, atom in molecule.atoms: + if attribute not in atom: + atoms_with_issues[attribute].append(atom) + if atoms_with_issues: + for attr, atoms in atoms_with_issues.items(): + LOGGER.error('The following atoms do not have a {}: {}', attr, atoms) + raise ValueError("Some atoms are missing required attributes.") # Get the maximum length of each atom field so we can align the fields. # Atom indexes are written as a consecutive series starting from 1. diff --git a/vermouth/map_input.py b/vermouth/map_input.py index d87f66d6..c5e41eaf 100644 --- a/vermouth/map_input.py +++ b/vermouth/map_input.py @@ -441,25 +441,23 @@ def read_mapping_directory(directory, force_fields): return dict(mappings) -def generate_self_mappings(blocks): +def generate_self_mappings(force_field): """ - Generate self mappings from a collection of blocks. + Generate self mappings from a modification for the blocks and modifications. A self mapping is a mapping that maps a force field to itself. Applying such mapping is applying a neutral transformation. Parameters ---------- - blocks: dict[str, networkx.Graph] - A dictionary of blocks with block names as keys and the blocks - themselves as values. The blocks must be instances of :class:`networkx.Graph` - with each node having an 'atomname' attribute. + force_field: vermouth.forcefield.ForceField + A force field containing Blocks and Modifications Returns ------- - mappings: dict[str, tuple] - A dictionary of mappings where the keys are the names of the blocks, - and the values are tuples like (mapping, weights, extra). + mappings: dict[~vermouth.map_parser.Mapping] + A dictionary of mappings where the keys are the names of the + blocks/modifications, and the values are Mappings. Raises ------ @@ -474,11 +472,16 @@ def generate_self_mappings(blocks): Generate self mappings for a list of force fields. """ mappings = {} - for name, block in blocks.items(): + for name, block in force_field.blocks.items(): mapping = Mapping(block, block, {idx: {idx: 1} for idx in block.nodes}, - {}, ff_from=block.force_field, ff_to=block.force_field, + {}, ff_from=force_field, ff_to=force_field, extra=[], type='block', names=(name,)) mappings[name] = mapping + for name, mod in force_field.modifications.items(): + mapping = Mapping(mod, mod, {idx: {idx: 1} for idx in mod.nodes}, + {}, ff_from=force_field, ff_to=force_field, + extra=[], type='modification', names=(name,)) + mappings[name] = mapping return mappings @@ -500,7 +503,7 @@ def generate_all_self_mappings(force_fields): mappings = collections.defaultdict(dict) for force_field in force_fields: name = force_field.name - mappings[name][name] = generate_self_mappings(force_field.blocks) + mappings[name][name] = generate_self_mappings(force_field) return dict(mappings) diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py index 38b25fd1..bf30479c 100644 --- a/vermouth/processors/do_mapping.py +++ b/vermouth/processors/do_mapping.py @@ -18,6 +18,7 @@ molecule. """ from collections import defaultdict +from copy import deepcopy from itertools import product, combinations import networkx as nx @@ -274,7 +275,7 @@ def modification_matches(molecule, mappings): LOGGER.warning("Can't find modification mappings for the " "modifications {}. The following modification " "mappings are known: {}", - list(group), known_mod_mappings, + list(group), sorted(known_mod_mappings.keys()), type='unmapped-atom') continue needed_mod_mappings.update(covered_by) @@ -414,6 +415,8 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol): mod_to_out = {} # Some nodes of modification will already exist. The question is # which, and which index they have in graph_out. + # We'll store these anchors for now + anchors = set() for mod_idx in modification: if not node_should_exist(modification, mod_idx): # Node does not exist yet. @@ -423,6 +426,7 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol): out_idx = max(graph_out) + 1 mod_to_out[mod_idx] = out_idx graph_out.add_node(out_idx, **modification.nodes[mod_idx]) + graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) else: # Node should already exist # We need to find the out_index of this node. Since the @@ -445,12 +449,20 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol): raise ValueError("No node found in molecule with " "atomname {}".format(modification.nodes[mod_idx]['atomname'])) # Undefined loop variable is guarded against by the else-raise above + anchors.add(mod_idx) mod_to_out[mod_idx] = out_idx # pylint: disable=undefined-loop-variable graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) # pylint: disable=undefined-loop-variable graph_out.nodes[out_idx]['modifications'] = graph_out.nodes[out_idx].get('modifications', []) if modification not in graph_out.nodes[out_idx]['modifications']: graph_out.nodes[out_idx]['modifications'].append(modification) + # FIXME Jank here to ensure the charge_group attributes look reasonable + charge_group_start = max(graph_out.nodes[mod_to_out[idx]].get('charge_group', 1) for idx in anchors) if anchors else 1 + for charge_group, mod_idx in enumerate(modification, charge_group_start): + out_idx = mod_to_out[mod_idx] + if 'charge_group' not in graph_out.nodes[out_idx]: + graph_out.nodes[out_idx]['charge_group'] = charge_group + for mol_idx in mol_to_mod: for mod_idx, weight in mol_to_mod[mol_idx].items(): out_idx = mod_to_out[mod_idx] @@ -541,8 +553,8 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), `molecule`. attribute_stash: tuple[str] The attributes that will always be transferred from the input molecule - to the produced graph, but prefixed with _old_.Thus they are new attributes - and are not conflicting with already defined attributes. + to the produced graph, but prefixed with _old_. Thus, they are new + attributes and are not conflicting with already defined attributes. Returns @@ -632,48 +644,45 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), to_remove = set() for out_idx in out_to_mol: mol_idxs = out_to_mol[out_idx].keys() - # Keep track of what bead comes from where + + # Time to collect the attributes to set. Start by grabbing all the + # attributes we already have. + node_attrs = deepcopy(graph_out.nodes[out_idx]) + + # In all cases, keep track of what bead comes from where subgraph = molecule.subgraph(mol_idxs) - graph_out.nodes[out_idx]['graph'] = subgraph + node_attrs['graph'] = subgraph weights = out_to_mol[out_idx] - graph_out.nodes[out_idx]['mapping_weights'] = weights + node_attrs['mapping_weights'] = weights + # Now we find attribute values from molecule. if out_idx in all_references: ref_idx = all_references[out_idx] - new_attrs = attrs_from_node(molecule.nodes[ref_idx], - attribute_keep+attribute_must+attribute_stash) - for attr, val in new_attrs.items(): - # Attrs in attribute_keep we always transfer, those in - # attribute_must we transfer only if they're not already in the - # created node - if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: - graph_out.nodes[out_idx].update(new_attrs) + mol_attrs = attrs_from_node(molecule.nodes[ref_idx], attribute_keep + attribute_stash) + for attr, val in mol_attrs.items(): + # This is if/if rather than if/elif on purpose. It could be that + # an attribute needs to be both stashed and kept if attr in attribute_stash: - graph_out.nodes[out_idx]["_old_"+attr] = val + node_attrs["_old_" + attr] = val + if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: + node_attrs[attr] = val else: attrs = defaultdict(list) + attrs_not_sane = [] for mol_idx in mol_idxs: - new_attrs = attrs_from_node(molecule.nodes[mol_idx], - attribute_keep+attribute_must+attribute_stash) - for attr, val in new_attrs.items(): + mol_attrs = attrs_from_node(molecule.nodes[mol_idx], attribute_keep + attribute_stash) + for attr, val in mol_attrs.items(): attrs[attr].append(val) - attrs_not_sane = [] - for attr, vals in attrs.items(): - if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: - if vals: - graph_out.nodes[out_idx][attr] = vals[0] - else: - # No nodes hat the attribute. - graph_out.nodes[out_idx][attr] = None - if attr in attribute_stash: - if vals: - graph_out.nodes[out_idx]["_old_"+attr] = vals[0] - else: - # No nodes hat the attribute. - graph_out.nodes[out_idx]["_old_"+attr] = None + for attr, vals in attrs.items(): if not are_all_equal(vals): attrs_not_sane.append(attr) + # This is if/if rather than if/elif on purpose. It could be that + # an attribute needs to be both stashed and kept + if attr in attribute_stash: + node_attrs["_old_" + attr] = vals[0] + if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: + node_attrs[attr] = vals[0] if attrs_not_sane: LOGGER.warning('The attributes {} for atom {} are going to' @@ -682,6 +691,7 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), attrs_not_sane, format_atom_string(graph_out.nodes[out_idx]), type='inconsistent-data') + graph_out.nodes[out_idx].update(node_attrs) if graph_out.nodes[out_idx].get('atomname', '') is None: to_remove.add(out_idx) @@ -757,6 +767,34 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), for idx in other_uncovered], type='unmapped-atom') + # "None to one" mapping - Strictly speaking this cannot happen, but it could + # be that output particles map to only particles that were not present in + # the input structure. This probably causes massive issues downstream. + # For now, flag a sane warning, and at the same time transfer missing "must" + # attributes from nearby atoms. + for node in graph_out: + for attr in attribute_must: + if attr not in graph_out.nodes[node]: + # We can skip `node`, since if it had the attribute required we + # wouldn't be in this mess to begin with. + # Set a depth_limit to keep the runtime within bounds. + close_nodes = (v for (u, v) in nx.bfs_edges(graph_out, source=node, depth_limit=3)) + for template_node in close_nodes: + if attr in graph_out.nodes[template_node]: + graph_out.nodes[node][attr] = deepcopy(graph_out.nodes[template_node][attr]) + LOGGER.warning("Atom {} did not have a {}, so we took " + "it from atom {} instead.", + format_atom_string(graph_out.nodes[node]), + attr, + format_atom_string(graph_out.nodes[template_node]), + type="inconsistent-data") + break + else: # no break, attribute not found + LOGGER.warning("Atom {} does not have a {}, and we couldn't " + "find a nearby atom that does.", + format_atom_string(graph_out.nodes[node]), + attr, type="inconsistent-data") + for interaction_type in modified_interactions: for atoms, modifications in modified_interactions[interaction_type].items(): if len(modifications) != 1: diff --git a/vermouth/tests/test_do_mapping.py b/vermouth/tests/test_do_mapping.py index 3916af84..b9dcc084 100644 --- a/vermouth/tests/test_do_mapping.py +++ b/vermouth/tests/test_do_mapping.py @@ -475,7 +475,7 @@ def test_apply_mod_mapping(modified_molecule, modifications): """ graph_out = Molecule(force_field=FF_UNIVERSAL) graph_out.add_nodes_from([ - (0, {'atomname': 'A', 'resid': 1}) + (0, {'atomname': 'A', 'resid': 1, 'charge_group': 1}) ]) mol_to_out = {0: {0: 1}} out_to_mol = {0: {0: 1}} @@ -492,7 +492,7 @@ def test_apply_mod_mapping(modified_molecule, modifications): assert mol_to_out == {0: {0: 1}, 1: {1: 1}} assert out_to_mol == {0: {0: 1}, 1: {1: 1}} - graph_out.add_node(2, atomname='J', resid=2) + graph_out.add_node(2, atomname='J', resid=2, charge_group=2) mol_to_out[16] = {2: 1} out_to_mol[2] = {16: 1} diff --git a/vermouth/tests/test_map_input.py b/vermouth/tests/test_map_input.py index c2184745..5fe50fab 100644 --- a/vermouth/tests/test_map_input.py +++ b/vermouth/tests/test_map_input.py @@ -522,15 +522,18 @@ def test_generate_self_mapping(): Test that :func:`vermouth.map_input.generate_self_mappings` works as expected. """ + force_field = vermouth.forcefield.ForceField(name='ff') # Build the input blocks blocks = { 'A0': vermouth.molecule.Block([['AA', 'BBB'], ['BBB', 'CCCC']]), 'B1': vermouth.molecule.Block([['BBB', 'CCCC'], ['BBB', 'E']]), } + force_field.blocks = blocks for name, block in blocks.items(): block.name = name for atomname, node in block.nodes.items(): node['atomname'] = atomname + # Build the expected output ref_mappings = { 'A0': ( @@ -547,10 +550,13 @@ def test_generate_self_mapping(): ), } # Actually test - mappings = vermouth.map_input.generate_self_mappings(blocks) + mappings = vermouth.map_input.generate_self_mappings(force_field) + for name, map in mappings.items(): + ref = ref_mappings[name] + assert map.mapping == ref[0] + assert map.block_to.extra == ref[1] + assert mappings.keys() == ref_mappings.keys() - for blockname in mappings: - assert mappings[blockname].mapping, mappings[blockname].extras == ref_mappings[blockname] def test_generate_all_self_mappings():