diff --git a/bin/martinize2 b/bin/martinize2 index d008fd60..afb67455 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -977,6 +977,9 @@ def entry(): idrs=args.idr_tune, disordered_regions=args.water_idrs ) + if 'bondedtypes' in known_force_fields[args.to_ff].variables: + LOGGER.info("Generating implicit interactions for RTP force field", type='step') + vermouth.RTPPolisher().run_system(system) # Apply position restraints if required. if args.posres != "none": diff --git a/vermouth/gmx/rtp.py b/vermouth/gmx/rtp.py index 37889232..08ec20c5 100644 --- a/vermouth/gmx/rtp.py +++ b/vermouth/gmx/rtp.py @@ -18,16 +18,13 @@ import collections import copy -import itertools import networkx as nx -from ..molecule import Block, Link, Interaction, Regex +from ..molecule import Block, Link, Interaction from .. import utils __all__ = ['read_rtp'] -from ..utils import first_alpha - # Name of the subsections in RTP files. # Names starting with a '_' are for internal use. RTP_SUBSECTIONS = ('atoms', 'bonds', 'angles', 'dihedrals', @@ -204,6 +201,7 @@ def _parse_bondedtypes(section): # Default taken from # 'src/gromacs/gmxpreprocess/resall.cpp::read_resall' in the Gromacs # source code. + defaults = _BondedTypes(bonds=1, angles=1, dihedrals=1, impropers=1, all_dihedrals=0, nrexcl=3, HH14=1, remove_dih=1) @@ -237,18 +235,6 @@ def _parse_bondedtypes(section): return bondedtypes -def _count_hydrogens(names): - return len([name for name in names if utils.first_alpha(name) == 'H']) - - -def _keep_dihedral(center, block, bondedtypes): - if (not bondedtypes.all_dihedrals) and block.has_dihedral_around(center): - return False - if bondedtypes.remove_dih and block.has_improper_around(center): - return False - return True - - def _complete_block(block, bondedtypes): """ Add information from the bondedtypes section to a block. @@ -258,49 +244,6 @@ def _complete_block(block, bondedtypes): """ block.make_edges_from_interactions() - # Generate all (missing) angles - existing_angles = {tuple(i.atoms) for i in block.get_interaction("angles")} - for atoms in block.guess_angles(): - if tuple(atoms) in existing_angles or tuple(reversed(atoms)) in existing_angles: - continue - else: - block.add_interaction("angles", atoms, []) - existing_angles.add(tuple(atoms)) - - # Generate missing dihedrals - # As pdb2gmx generates all the possible dihedral angles by default, - # RTP files are written assuming they will be generated. A RTP file - # have some control over these dihedral angles through the bondedtypes - # section. - all_dihedrals = [] - for center, dihedrals in itertools.groupby( - sorted(block.guess_dihedrals(), key=_dihedral_sorted_center), - _dihedral_sorted_center): - if _keep_dihedral(center, block, bondedtypes): - # TODO: Also sort the dihedrals by index. - # See src/gromacs/gmxpreprocess/gen_add.cpp::dcomp in the - # Gromacs source code (see version 2016.3 for instance). - atoms = sorted(dihedrals, key=_count_hydrogens)[0] - all_dihedrals.append(Interaction(atoms=atoms, parameters=[], meta={})) - # TODO: Sort the dihedrals by index - block.interactions['dihedrals'] = ( - block.interactions.get('dihedrals', []) + all_dihedrals - ) - - # https://manual.gromacs.org/current/reference-manual/file-formats.html#rtp - # Pair interactions are generated for all pairs of atoms which are separated - # by 3 bonds (except pairs of hydrogens). - # hydrogens = {n for n in block if block.nodes[n].get('element', first_alpha(n)) == 'H'} - # distances = dict(nx.shortest_path_length(block)) - # for n_idx in block: - # for n_jdx in block: - # if n_idx >= n_jdx: - # continue - # # 1-4 pairs have distance 3 - # if ((distances[n_idx].get(n_jdx, -1) == 3) and - # (bondedtypes.HH14 or not (n_idx in hydrogens and n_jdx in hydrogens))): - # block.add_interaction('pair', atoms=(n_idx, n_jdx), parameters=[1]) - # Add function types to the interaction parameters. This is done as a # post processing step to cluster as much interaction specific code # into this method. @@ -354,7 +297,8 @@ def _split_blocks_and_links(pre_blocks): for name, pre_block in pre_blocks.items(): block, link = _split_block_and_link(pre_block) blocks[name] = block - links.append(link) + if link: + links.append(link) return blocks, links @@ -397,7 +341,6 @@ def _split_block_and_link(pre_block): link_atoms = set() - do_print = pre_block.force_field.name == 'charmm36-jul2022.ff' and pre_block.name == 'ALA' for _, interactions in pre_block.interactions.items(): for interaction in interactions: # Atoms that are not defined in the atoms section but only in interactions @@ -405,53 +348,16 @@ def _split_block_and_link(pre_block): atomnames = [pre_block.nodes[n_idx].get('atomname') or n_idx for n_idx in interaction.atoms] if any(atomname[0] in '+-' for atomname in atomnames): link_atoms.update(interaction.atoms) + # Here we make 1 link consisting of all the atoms in interactions that + # contain at least one atom from a neighbouring residue, which may be an + # issue since it constrains where the link applies. link = Link(pre_block.subgraph(link_atoms)) - block = pre_block block.remove_nodes_from([n for n in pre_block if pre_block.nodes[n].get('atomname', n)[0] in '+-']) for node in block: block.nodes[node]['resname'] = block.name - # Filter the particles from neighboring residues out of the block. - # for atom in pre_block.atoms: - # if not atom['atomname'].startswith('+-'): - # atom['resname'] = pre_block.name - # block.add_atom(atom) - # link.add_node(atom['atomname']) - # - # Create the edges of the link and block based on the edges in the pre-block. - # This will create too many edges in the link, but the useless ones will be - # pruned latter. - # link.add_edges_from(pre_block.edges) - # block.add_edges_from(edge for edge in pre_block.edges - # if not any(node[0] in '+-' for node in edge)) - - # Split the interactions from the pre-block between the block (for - # intra-residue interactions) and the link (for inter-residues ones). - # The "relevant_atoms" set keeps track of what particles are - # involved in the link. This will allow to prune the link without - # iterating again through its interactions. - # relevant_atoms = set() - # for name, interactions in pre_block.interactions.items(): - # for interaction in interactions: - # for_link = any(atom[0] in '+-' for atom in interaction.atoms) - # if for_link: - # link.interactions[name].append(interaction) - # relevant_atoms.update(interaction.atoms) - # else: - # block.interactions[name].append(interaction) - - # Prune the link to keep only the edges and particles that are - # relevant. - # nodes = set(link.nodes()) - # link.remove_nodes_from(nodes - relevant_atoms) - # Some interactions do not generate nodes (impropers for instance). If a - # node is only described in one of such interactions, then the node never - # appears in the link. Here we make sure these nodes exists even if they - # are note connected. - # link.add_nodes_from(relevant_atoms) - # Atoms from a links are matched against a molecule based on its node # attributes. The name is a primary criterion, but other criteria can be # the residue name (`resname` key) or the residue order in the chain @@ -489,8 +395,6 @@ def _split_block_and_link(pre_block): for interaction in interactions: atoms = tuple(relabel_mapping[atom] for atom in interaction.atoms) if not any(link.nodes[node]['order'] for node in atoms): - if do_print: - print(f'Skipping {interaction}') continue new_interactions[name].append(Interaction( atoms=atoms, @@ -498,13 +402,9 @@ def _split_block_and_link(pre_block): meta=interaction.meta )) link.interactions = new_interactions - if do_print: - print(link.interactions) # Revert the interactions back to regular dicts to avoid creating # keys when querying them. - if do_print: - print(block.interactions['angles']) block.interactions = dict(block.interactions) link.interactions = dict(link.interactions) return block, link @@ -518,11 +418,6 @@ def _clean_lines(lines): yield splitted[0] -def _dihedral_sorted_center(atoms): - #return sorted(atoms[1:-1]) - return atoms[1:-1] - - def read_rtp(lines, force_field): """ Read blocks and links from a Gromacs RTP file to populate a force field @@ -576,12 +471,6 @@ def read_rtp(lines, force_field): # blocks and links. blocks, links = _split_blocks_and_links(pre_blocks) - # 1-4 pair link: - pair_link = Link(nx.path_graph(4)) - pair_link.symmetric = True - pair_link.add_interaction('pair', atoms=(0, 3), parameters=[1]) - if not bondedtypes.HH14: - pair_link.nodes[0]['element'] = Regex(r'[^H]') - links.append(pair_link) force_field.blocks.update(blocks) force_field.links.extend(links) + force_field.variables['bondedtypes'] = bondedtypes diff --git a/vermouth/processors/__init__.py b/vermouth/processors/__init__.py index f5a5d8ac..ecc24351 100644 --- a/vermouth/processors/__init__.py +++ b/vermouth/processors/__init__.py @@ -45,3 +45,4 @@ from .annotate_mut_mod import AnnotateMutMod from .water_bias import ComputeWaterBias from .annotate_idrs import AnnotateIDRs +from .rtp_polisher import RTPPolisher diff --git a/vermouth/processors/rtp_polisher.py b/vermouth/processors/rtp_polisher.py new file mode 100644 index 00000000..6ede2c6b --- /dev/null +++ b/vermouth/processors/rtp_polisher.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2018 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import defaultdict +import networkx as nx + +from ..molecule import Interaction +from .processor import Processor +from ..utils import first_alpha + + +def _generate_paths(graph, length): + if length < 1: + return + elif length == 1: + yield from graph.edges + yield from (e[::-1] for e in graph.edges) + return + for p in _generate_paths(graph, length-1): + for neighbour in set(graph[p[-1]]) - set(p): # Or should this be `graph[p[-1]] - p[-2]`? + yield p + (neighbour,) + return + + +def _keep_dihedral(dihedral, known_dihedral_centers, known_improper_centers, all_dihedrals, remove_dihedrals): + # https://manual.gromacs.org/current/reference-manual/file-formats.html#rtp + # gmx pdb2gmx automatically generates one proper dihedral for every + # rotatable bond, preferably on heavy atoms. When the [dihedrals] field is + # used, no other dihedrals will be generated for the bonds corresponding to + # the specified dihedrals. It is possible to put more than one dihedral on + # a rotatable bond. + # Column 5 : This controls the generation of dihedrals from the bonding. + # All possible dihedrals are generated automatically. A value of + # 1 here means that all these are retained. A value of + # 0 here requires generated dihedrals be removed if + # * there are any dihedrals on the same central atoms + # specified in the residue topology, or + # * there are other identical generated dihedrals + # sharing the same central atoms, or + # * there are other generated dihedrals sharing the + # same central bond that have fewer hydrogen atoms + # Column 8: Remove proper dihedrals if centered on the same bond + # as an improper dihedral + # https://gitlab.com/gromacs/gromacs/-/blob/main/src/gromacs/gmxpreprocess/gen_ad.cpp#L249 + center = tuple(dihedral[1:3]) + if (not all_dihedrals) and (center in known_dihedral_centers or center[::-1] in known_dihedral_centers): + return False + if remove_dihedrals and center in known_improper_centers: + return False + return True + + +def add_angles(mol, *, bond_type=5): + known_angles = {tuple(i.atoms) for i in mol.get_interaction("angles")} + for p in _generate_paths(mol, 2): + if p not in known_angles and p[::-1] not in known_angles: + mol.add_interaction('angles', atoms=p, parameters=[bond_type], meta={'comment': 'implicit angle'}) + known_angles.add(p) + + +def add_dihedrals_and_pairs(mol, *, bond_type=2, all_dihedrals=True, + HH14_pair=False, remove_dihedrals=True): + # Generate missing dihedrals and pair interactions + # As pdb2gmx generates all the possible dihedral angles by default, + # RTP files are written assuming they will be generated. A RTP file + # have some control over these dihedral angles through the bondedtypes + # section. + + explicit_dihedral_centers = {frozenset(dih.atoms[1:3]) for dih in mol.interactions.get('dihedrals', [])} + implicit_dihedrals = defaultdict(set) + explicit_improper_centers = {frozenset(dih.atoms[1:3]) for dih in mol.interactions.get('impropers', [])} + distances = dict(nx.all_pairs_shortest_path_length(mol, cutoff=3)) + + known_pairs = {frozenset(inter.atoms) for inter in mol.interactions.get('pairs', [])} + hydrogens = {n for n in mol if mol.nodes[n].get('element', first_alpha(mol.nodes[n]['atomname'])) == 'H'} + for path in _generate_paths(mol, 3): + center = frozenset(path[1:3]) + # https://manual.gromacs.org/current/reference-manual/file-formats.html#rtp + # gmx pdb2gmx automatically generates one proper dihedral for every + # rotatable bond, preferably on heavy atoms. When the [dihedrals] field is + # used, no other dihedrals will be generated for the bonds corresponding to + # the specified dihedrals. It is possible to put more than one dihedral on + # a rotatable bond. + # Column 5 : This controls the generation of dihedrals from the bonding. + # All possible dihedrals are generated automatically. A value of + # 1 here means that all these are retained. A value of + # 0 here requires generated dihedrals be removed if + # * there are any dihedrals on the same central atoms + # specified in the residue topology, or + # * there are other identical generated dihedrals + # sharing the same central atoms, or + # * there are other generated dihedrals sharing the + # same central bond that have fewer hydrogen atoms + # Column 8: Remove proper dihedrals if centered on the same bond + # as an improper dihedral + # https://gitlab.com/gromacs/gromacs/-/blob/main/src/gromacs/gmxpreprocess/gen_ad.cpp#L249 + if (not (center not in explicit_dihedral_centers and remove_dihedrals and center in explicit_improper_centers) + and path not in implicit_dihedrals[center] and path[::-1] not in implicit_dihedrals[center]): + implicit_dihedrals[center].add(path) + + pair = frozenset({path[0], path[-1]}) + if (HH14_pair or not (pair <= hydrogens)) and pair not in known_pairs and distances[path[0]][path[-1]] == 3: + # Pair interactions are generated for all pairs of atoms which are separated + # by 3 bonds (except pairs of hydrogens). + # TODO: correct for specified exclusions + mol.add_interaction('pairs', atoms=tuple(sorted(pair)), parameters=[1]) + known_pairs.add(pair) + + for center in implicit_dihedrals: + if all_dihedrals: + # Just add everything + # TODO: Also sort the dihedrals by index. + # See src/gromacs/gmxpreprocess/gen_add.cpp::dcomp in the + # Gromacs source code (see version 2016.3 for instance). + + dihedrals = [Interaction(atoms=p, parameters=[bond_type], meta={'comment': 'implicit dihedral'}) + for p in implicit_dihedrals[center]] + mol.interactions['dihedrals'] = mol.interactions.get('dihedrals', []) + dihedrals + else: + # Find the dihedral with the least amount of hydrogens + best_dihedral = min(implicit_dihedrals[center], + key=lambda p: sum(mol.nodes[idx].get('atomname', '') == 'H' for idx in p)) + mol.add_interaction('dihedrals', atoms=best_dihedral, parameters=[bond_type], meta={'comment': 'implicit dihedral'}) + + +class RTPPolisher(Processor): + def run_molecule(self, molecule): + bondedtypes = molecule.force_field.variables['bondedtypes'] + # bond_type = bondedtypes.bonds + angle_type = bondedtypes.angles + dihedral_type = bondedtypes.dihedrals + # improper_type = bondedtypes.impropers + all_dihedrals = bondedtypes.all_dihedrals + HH14_pair = bondedtypes.HH14 + remove_dihedrals = bondedtypes.remove_dih + add_angles(molecule, bond_type=angle_type) + add_dihedrals_and_pairs(molecule, bond_type=dihedral_type, all_dihedrals=all_dihedrals, HH14_pair=HH14_pair, remove_dihedrals=remove_dihedrals) + return molecule