From abd9436a2a6f9bd8f9c64be3f2e051505d5582a0 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Mon, 24 Jun 2024 15:59:24 +0200
Subject: [PATCH 01/13] Autogenerate modification auto-mappings

---
 vermouth/map_input.py            | 27 +++++++++++++++------------
 vermouth/tests/test_map_input.py | 12 +++++++++---
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/vermouth/map_input.py b/vermouth/map_input.py
index d87f66d62..9dccdba09 100644
--- a/vermouth/map_input.py
+++ b/vermouth/map_input.py
@@ -441,25 +441,23 @@ def read_mapping_directory(directory, force_fields):
     return dict(mappings)
 
 
-def generate_self_mappings(blocks):
+def generate_self_mappings(force_field):
     """
-    Generate self mappings from a collection of blocks.
+    Generate self mappings from a modification for the blocks and modifications.
 
     A self mapping is a mapping that maps a force field to itself. Applying
     such mapping is applying a neutral transformation.
 
     Parameters
     ----------
-    blocks: dict[str, networkx.Graph]
-        A dictionary of blocks with block names as keys and the blocks
-        themselves as values. The blocks must be instances of :class:`networkx.Graph`
-        with each node having an 'atomname' attribute.
+    force_field: vermouth.ForceField
+        A force field containing Blocks and Modifications
 
     Returns
     -------
-    mappings: dict[str, tuple]
-        A dictionary of mappings where the keys are the names of the blocks,
-        and the values are tuples like (mapping, weights, extra).
+    mappings: dict[Mapping]
+        A dictionary of mappings where the keys are the names of the
+        blocks/modifications, and the values are Mappings.
 
     Raises
     ------
@@ -474,11 +472,16 @@ def generate_self_mappings(blocks):
         Generate self mappings for a list of force fields.
     """
     mappings = {}
-    for name, block in blocks.items():
+    for name, block in force_field.blocks.items():
         mapping = Mapping(block, block, {idx: {idx: 1} for idx in block.nodes},
-                          {}, ff_from=block.force_field, ff_to=block.force_field,
+                          {}, ff_from=force_field, ff_to=force_field,
                           extra=[], type='block', names=(name,))
         mappings[name] = mapping
+    for name, mod in force_field.modifications.items():
+        mapping = Mapping(mod, mod, {idx: {idx: 1} for idx in mod.nodes},
+                          {}, ff_from=force_field, ff_to=force_field,
+                          extra=[], type='modification', names=name)
+        mappings[name] = mapping
     return mappings
 
 
@@ -500,7 +503,7 @@ def generate_all_self_mappings(force_fields):
     mappings = collections.defaultdict(dict)
     for force_field in force_fields:
         name = force_field.name
-        mappings[name][name] = generate_self_mappings(force_field.blocks)
+        mappings[name][name] = generate_self_mappings(force_field)
     return dict(mappings)
 
 
diff --git a/vermouth/tests/test_map_input.py b/vermouth/tests/test_map_input.py
index c21847450..5fe50fab4 100644
--- a/vermouth/tests/test_map_input.py
+++ b/vermouth/tests/test_map_input.py
@@ -522,15 +522,18 @@ def test_generate_self_mapping():
     Test that :func:`vermouth.map_input.generate_self_mappings` works as
     expected.
     """
+    force_field = vermouth.forcefield.ForceField(name='ff')
     # Build the input blocks
     blocks = {
         'A0': vermouth.molecule.Block([['AA', 'BBB'], ['BBB', 'CCCC']]),
         'B1': vermouth.molecule.Block([['BBB', 'CCCC'], ['BBB', 'E']]),
     }
+    force_field.blocks = blocks
     for name, block in blocks.items():
         block.name = name
         for atomname, node in block.nodes.items():
             node['atomname'] = atomname
+
     # Build the expected output
     ref_mappings = {
         'A0': (
@@ -547,10 +550,13 @@ def test_generate_self_mapping():
         ),
     }
     # Actually test
-    mappings = vermouth.map_input.generate_self_mappings(blocks)
+    mappings = vermouth.map_input.generate_self_mappings(force_field)
+    for name, map in mappings.items():
+        ref = ref_mappings[name]
+        assert map.mapping == ref[0]
+        assert map.block_to.extra == ref[1]
+
     assert mappings.keys() == ref_mappings.keys()
-    for blockname in mappings:
-        assert mappings[blockname].mapping, mappings[blockname].extras == ref_mappings[blockname]
 
 
 def test_generate_all_self_mappings():

From 37baebf1344b418793405b4a760c0813d3599140 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Mon, 24 Jun 2024 16:03:12 +0200
Subject: [PATCH 02/13] Fix docstring formatting

---
 vermouth/map_input.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vermouth/map_input.py b/vermouth/map_input.py
index 9dccdba09..0899b69f0 100644
--- a/vermouth/map_input.py
+++ b/vermouth/map_input.py
@@ -450,12 +450,12 @@ def generate_self_mappings(force_field):
 
     Parameters
     ----------
-    force_field: vermouth.ForceField
+    force_field: vermouth.forcefield.ForceField
         A force field containing Blocks and Modifications
 
     Returns
     -------
-    mappings: dict[Mapping]
+    mappings: dict[~vermouth.map_parser.Mapping]
         A dictionary of mappings where the keys are the names of the
         blocks/modifications, and the values are Mappings.
 

From bc77b9de1ece62632d2c6e5422720a4e783946cc Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Wed, 26 Jun 2024 11:56:01 +0200
Subject: [PATCH 03/13] FIx modification auto-mapping names

---
 vermouth/map_input.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vermouth/map_input.py b/vermouth/map_input.py
index 0899b69f0..c5e41eaf4 100644
--- a/vermouth/map_input.py
+++ b/vermouth/map_input.py
@@ -480,7 +480,7 @@ def generate_self_mappings(force_field):
     for name, mod in force_field.modifications.items():
         mapping = Mapping(mod, mod, {idx: {idx: 1} for idx in mod.nodes},
                           {}, ff_from=force_field, ff_to=force_field,
-                          extra=[], type='modification', names=name)
+                          extra=[], type='modification', names=(name,))
         mappings[name] = mapping
     return mappings
 

From 1aee1320e8cced15d1604051e4dd6e0935188146 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Wed, 26 Jun 2024 11:56:26 +0200
Subject: [PATCH 04/13] Improve debug output of write_itp

---
 vermouth/gmx/itp.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/vermouth/gmx/itp.py b/vermouth/gmx/itp.py
index 821232c31..7f9640891 100644
--- a/vermouth/gmx/itp.py
+++ b/vermouth/gmx/itp.py
@@ -18,6 +18,10 @@
 
 import copy
 import itertools
+from collections import defaultdict
+from ..log_helpers import StyleAdapter, get_logger
+
+LOGGER = StyleAdapter(get_logger(__name__))
 
 __all__ = ['write_molecule_itp', ]
 
@@ -111,10 +115,16 @@ def write_molecule_itp(molecule, outfile, header=(), moltype=None,
     # Make sure the molecule contains the information required to write the
     # [atoms] section. The charge and mass can be ommited, if so gromacs take
     # them from the [atomtypes] section of the ITP file.
+    atoms_with_issues = defaultdict(list)
     for attribute in ('atype', 'resid', 'resname', 'atomname',
                       'charge_group'):
-        if not all([attribute in atom for _, atom in molecule.atoms]):
-            raise ValueError('Not all atom have a {}.'.format(attribute))
+        for _, atom in molecule.atoms:
+            if attribute not in atom:
+                atoms_with_issues[attribute].append(atom)
+    if atoms_with_issues:
+        for attr, atoms in atoms_with_issues.items():
+            LOGGER.error('The following atoms do not have a {}: {}', attr, atoms)
+        raise ValueError("Some atoms are missing required attributes.")
 
     # Get the maximum length of each atom field so we can align the fields.
     # Atom indexes are written as a consecutive series starting from 1.

From dc44f4488e1432188a3c64316ab42e415f48c10a Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Wed, 26 Jun 2024 11:56:48 +0200
Subject: [PATCH 05/13] Improve debug output of known modification mappings

---
 vermouth/processors/do_mapping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py
index 38b25fd18..f92a115ff 100644
--- a/vermouth/processors/do_mapping.py
+++ b/vermouth/processors/do_mapping.py
@@ -274,7 +274,7 @@ def modification_matches(molecule, mappings):
             LOGGER.warning("Can't find modification mappings for the "
                            "modifications {}. The following modification "
                            "mappings are known: {}",
-                           list(group), known_mod_mappings,
+                           list(group), sorted(known_mod_mappings.keys()),
                            type='unmapped-atom')
             continue
         needed_mod_mappings.update(covered_by)

From 08fa681c801ee8d20494b296c7d47ffe38e97de5 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Wed, 26 Jun 2024 17:17:01 +0200
Subject: [PATCH 06/13] Make node attribute propagation a bit more sane and
 make sure all nodes get a 'charge_group'

---
 bin/martinize2                    |  2 +-
 vermouth/processors/do_mapping.py | 66 ++++++++++++++++---------------
 2 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/bin/martinize2 b/bin/martinize2
index 38612bb50..52ccf24fe 100755
--- a/bin/martinize2
+++ b/bin/martinize2
@@ -180,7 +180,7 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde
         to_ff=to_ff,
         delete_unknown=delete_unknown,
         attribute_keep=("cgsecstruct", "chain"),
-        attribute_must=("resname",),
+        attribute_must=("resname", "charge_group"),
         attribute_stash=("resid",),
     ).run_system(system)
     LOGGER.info("Averaging the coordinates.", type="step")
diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py
index f92a115ff..81c674c81 100644
--- a/vermouth/processors/do_mapping.py
+++ b/vermouth/processors/do_mapping.py
@@ -541,8 +541,8 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
         `molecule`.
     attribute_stash: tuple[str]
         The attributes that will always be transferred from the input molecule
-        to the produced graph, but prefixed with _old_.Thus they are new attributes
-        and are not conflicting with already defined attributes.
+        to the produced graph, but prefixed with _old_. Thus, they are new
+        attributes and are not conflicting with already defined attributes.
 
 
     Returns
@@ -632,48 +632,49 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
     to_remove = set()
     for out_idx in out_to_mol:
         mol_idxs = out_to_mol[out_idx].keys()
-        # Keep track of what bead comes from where
+
+        # Time to collect the attributes to set. Start by grabbing all the
+        # attributes we already have.
+        node_attrs = dict(graph_out.nodes[out_idx])
+        # These attributes the new node *must* have. We'll replace these with
+        # attrs from molecule if we can later.
+        for attr_name in attribute_must:
+            if attr_name not in node_attrs:
+                node_attrs[attr_name] = None
+        # In all cases, keep track of what bead comes from where
         subgraph = molecule.subgraph(mol_idxs)
-        graph_out.nodes[out_idx]['graph'] = subgraph
+        node_attrs['graph'] = subgraph
         weights = out_to_mol[out_idx]
-        graph_out.nodes[out_idx]['mapping_weights'] = weights
+        node_attrs['mapping_weights'] = weights
 
+        # Now we find attribute values from molecule.
         if out_idx in all_references:
             ref_idx = all_references[out_idx]
-            new_attrs = attrs_from_node(molecule.nodes[ref_idx],
-                                        attribute_keep+attribute_must+attribute_stash)
-            for attr, val in new_attrs.items():
-                # Attrs in attribute_keep we always transfer, those in
-                # attribute_must we transfer only if they're not already in the
-                # created node
-                if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
-                    graph_out.nodes[out_idx].update(new_attrs)
-                if attr in attribute_stash:
-                    graph_out.nodes[out_idx]["_old_"+attr] = val
+            mol_attrs = attrs_from_node(molecule.nodes[ref_idx], attribute_keep + attribute_must + attribute_stash)
+            for attr, val in mol_attrs.items():
+                if attr in attribute_keep:
+                    node_attrs[attr] = val
+                elif attr in attribute_stash:
+                    node_attrs["_old_" + attr] = val
+                elif attr not in graph_out.nodes[out_idx]:
+                    node_attrs[attr] = val
         else:
             attrs = defaultdict(list)
+            attrs_not_sane = []
             for mol_idx in mol_idxs:
-                new_attrs = attrs_from_node(molecule.nodes[mol_idx],
-                                            attribute_keep+attribute_must+attribute_stash)
-                for attr, val in new_attrs.items():
+                mol_attrs = attrs_from_node(molecule.nodes[mol_idx], attribute_keep + attribute_must + attribute_stash)
+                for attr, val in mol_attrs.items():
                     attrs[attr].append(val)
-            attrs_not_sane = []
-            for attr, vals in attrs.items():
-                if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
-                    if vals:
-                        graph_out.nodes[out_idx][attr] = vals[0]
-                    else:
-                        # No nodes hat the attribute.
-                        graph_out.nodes[out_idx][attr] = None
-                if attr in attribute_stash:
-                    if vals:
-                        graph_out.nodes[out_idx]["_old_"+attr] = vals[0]
-                    else:
-                        # No nodes hat the attribute.
-                        graph_out.nodes[out_idx]["_old_"+attr] = None
 
+            for attr, vals in attrs.items():
                 if not are_all_equal(vals):
                     attrs_not_sane.append(attr)
+                if attr in attribute_keep:
+                    node_attrs[attr] = vals[0]
+                elif attr in attribute_stash:
+                    node_attrs["_old_" + attr] = vals[0]
+                elif attr not in graph_out.nodes[out_idx]:
+                    node_attrs[attr] = vals[0]
 
             if attrs_not_sane:
                 LOGGER.warning('The attributes {} for atom {} are going to'
@@ -682,6 +683,7 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
                                attrs_not_sane,
                                format_atom_string(graph_out.nodes[out_idx]),
                                type='inconsistent-data')
+        graph_out.nodes[out_idx].update(node_attrs)
         if graph_out.nodes[out_idx].get('atomname', '') is None:
             to_remove.add(out_idx)
 

From e430b72d560f4f145b29bc589c15405aadb99b55 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 28 Jun 2024 15:17:12 +0200
Subject: [PATCH 07/13] Find missing required node attributes at nearby nodes
 if needed

---
 bin/martinize2                    |  2 +-
 vermouth/processors/do_mapping.py | 53 +++++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/bin/martinize2 b/bin/martinize2
index 52ccf24fe..c70ed44e2 100755
--- a/bin/martinize2
+++ b/bin/martinize2
@@ -180,7 +180,7 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde
         to_ff=to_ff,
         delete_unknown=delete_unknown,
         attribute_keep=("cgsecstruct", "chain"),
-        attribute_must=("resname", "charge_group"),
+        attribute_must=("resname", "charge_group", "atype"),
         attribute_stash=("resid",),
     ).run_system(system)
     LOGGER.info("Averaging the coordinates.", type="step")
diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py
index 81c674c81..48a7692a2 100644
--- a/vermouth/processors/do_mapping.py
+++ b/vermouth/processors/do_mapping.py
@@ -18,6 +18,7 @@
 molecule.
 """
 from collections import defaultdict
+from copy import deepcopy
 from itertools import product, combinations
 
 import networkx as nx
@@ -635,12 +636,8 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
 
         # Time to collect the attributes to set. Start by grabbing all the
         # attributes we already have.
-        node_attrs = dict(graph_out.nodes[out_idx])
-        # These attributes the new node *must* have. We'll replace these with
-        # attrs from molecule if we can later.
-        for attr_name in attribute_must:
-            if attr_name not in node_attrs:
-                node_attrs[attr_name] = None
+        node_attrs = deepcopy(graph_out.nodes[out_idx])
+
         # In all cases, keep track of what bead comes from where
         subgraph = molecule.subgraph(mol_idxs)
         node_attrs['graph'] = subgraph
@@ -652,11 +649,11 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
             ref_idx = all_references[out_idx]
             mol_attrs = attrs_from_node(molecule.nodes[ref_idx], attribute_keep + attribute_must + attribute_stash)
             for attr, val in mol_attrs.items():
-                if attr in attribute_keep:
-                    node_attrs[attr] = val
-                elif attr in attribute_stash:
+                # This is if/if rather than if/elif on purpose. It could be that
+                # an attribute needs to be both stashed and kept
+                if attr in attribute_stash:
                     node_attrs["_old_" + attr] = val
-                elif attr not in graph_out.nodes[out_idx]:
+                if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
                     node_attrs[attr] = val
         else:
             attrs = defaultdict(list)
@@ -669,11 +666,11 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
             for attr, vals in attrs.items():
                 if not are_all_equal(vals):
                     attrs_not_sane.append(attr)
-                if attr in attribute_keep:
-                    node_attrs[attr] = vals[0]
-                elif attr in attribute_stash:
+                # This is if/if rather than if/elif on purpose. It could be that
+                # an attribute needs to be both stashed and kept
+                if attr in attribute_stash:
                     node_attrs["_old_" + attr] = vals[0]
-                elif attr not in graph_out.nodes[out_idx]:
+                if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
                     node_attrs[attr] = vals[0]
 
             if attrs_not_sane:
@@ -759,6 +756,34 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
                             for idx in other_uncovered],
                            type='unmapped-atom')
 
+    # "None to one" mapping - Strictly speaking this cannot happen, but it could
+    # be that output particles map to only particles that were not present in
+    # the input structure. This probably causes massive issues downstream.
+    # For now, flag a sane warning, and at the same time transfer missing "must"
+    # attributes from nearby atoms.
+    for node in graph_out:
+        for attr in attribute_must:
+            if attr not in graph_out.nodes[node]:
+                # We can skip `node`, since if it had the attribute required we
+                # wouldn't be in this mess to begin with.
+                # Set a depth_limit to keep the runtime within bounds.
+                close_nodes = (v for (u, v) in nx.bfs_edges(graph_out, source=node, depth_limit=3))
+                for template_node in close_nodes:
+                    if attr in graph_out.nodes[template_node]:
+                        graph_out.nodes[node][attr] = deepcopy(graph_out.nodes[template_node][attr])
+                        LOGGER.warning("Atom {} did not have a {}, so we took "
+                                       "it from atom {} instead.",
+                                       format_atom_string(graph_out.nodes[node]),
+                                       attr,
+                                       format_atom_string(graph_out.nodes[template_node]),
+                                       type="inconsistent-data")
+                        break
+                else:  # no break, attribute not found
+                    LOGGER.warning("Atom {} does not have a {}, and we couldn't "
+                                   "find a nearby atom that does.",
+                                   format_atom_string(graph_out.nodes[node]),
+                                   attr, type="inconsistent-data")
+
     for interaction_type in modified_interactions:
         for atoms, modifications in modified_interactions[interaction_type].items():
             if len(modifications) != 1:

From 3cddb63ed90d0546321c80456599b11df0f0984b Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 28 Jun 2024 15:39:06 +0200
Subject: [PATCH 08/13] Tweak attribute propagation for tests

---
 bin/martinize2                    | 4 ++--
 vermouth/processors/do_mapping.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/bin/martinize2 b/bin/martinize2
index c70ed44e2..d527c4ace 100755
--- a/bin/martinize2
+++ b/bin/martinize2
@@ -179,8 +179,8 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde
         mappings=mappings,
         to_ff=to_ff,
         delete_unknown=delete_unknown,
-        attribute_keep=("cgsecstruct", "chain"),
-        attribute_must=("resname", "charge_group", "atype"),
+        attribute_keep=("cgsecstruct", "resname", "chain"),
+        attribute_must=("resname", "atype"),
         attribute_stash=("resid",),
     ).run_system(system)
     LOGGER.info("Averaging the coordinates.", type="step")
diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py
index 48a7692a2..d9bd9aef6 100644
--- a/vermouth/processors/do_mapping.py
+++ b/vermouth/processors/do_mapping.py
@@ -647,7 +647,7 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
         # Now we find attribute values from molecule.
         if out_idx in all_references:
             ref_idx = all_references[out_idx]
-            mol_attrs = attrs_from_node(molecule.nodes[ref_idx], attribute_keep + attribute_must + attribute_stash)
+            mol_attrs = attrs_from_node(molecule.nodes[ref_idx], attribute_keep + attribute_stash)
             for attr, val in mol_attrs.items():
                 # This is if/if rather than if/elif on purpose. It could be that
                 # an attribute needs to be both stashed and kept
@@ -659,7 +659,7 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(),
             attrs = defaultdict(list)
             attrs_not_sane = []
             for mol_idx in mol_idxs:
-                mol_attrs = attrs_from_node(molecule.nodes[mol_idx], attribute_keep + attribute_must + attribute_stash)
+                mol_attrs = attrs_from_node(molecule.nodes[mol_idx], attribute_keep + attribute_stash)
                 for attr, val in mol_attrs.items():
                     attrs[attr].append(val)
 

From 9e53e174cc8d388022f2bf0d2e3a3ccc283095db Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 28 Jun 2024 15:39:57 +0200
Subject: [PATCH 09/13] Make sure all modification nodes get their charge_group

---
 vermouth/ffinput.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/vermouth/ffinput.py b/vermouth/ffinput.py
index d52859957..ff8d77efe 100644
--- a/vermouth/ffinput.py
+++ b/vermouth/ffinput.py
@@ -166,12 +166,14 @@ def finalize_section(self, previous_section, ended_section):
             self.current_block.citations.update(self.citations)
             self.current_block.make_edges_from_interactions()
             self.force_field.blocks[self.current_block.name] = self.current_block
+            self.current_block = None
 
         if self.current_link is not None:
             # add FF wide citations
             self.current_link.citations.update(self.citations)
             self.current_link.make_edges_from_interactions()
             self.force_field.links.append(self.current_link)
+            self.current_link = None
 
         if self.current_modification is not None:
             # add FF wide citations
@@ -180,6 +182,14 @@ def finalize_section(self, previous_section, ended_section):
                              self.current_modification.name, self.force_field.name)
             self.current_modification.citations.update(self.citations)
             self.force_field.modifications[self.current_modification.name] = self.current_modification
+            # Add charge_group attribute to nodes that don't have one.
+            charge_group = 1
+            for node_idx in sorted(self.current_modification):
+                node = self.current_modification.nodes[node_idx]
+                if 'charge_group' not in node:
+                    node['charge_group'] = charge_group
+                charge_group = node['charge_group'] + 1
+            self.current_modification = None
 
     def get_context(self, context_type=''):
         possible_contexts = {

From 170818a02345f5128c6e78722957fe833f0f9b7b Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 28 Jun 2024 15:50:23 +0200
Subject: [PATCH 10/13] Make modification charge_groups slightly better

---
 vermouth/ffinput.py               |  7 -------
 vermouth/processors/do_mapping.py | 11 +++++++++++
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/vermouth/ffinput.py b/vermouth/ffinput.py
index ff8d77efe..84ec54b9e 100644
--- a/vermouth/ffinput.py
+++ b/vermouth/ffinput.py
@@ -182,13 +182,6 @@ def finalize_section(self, previous_section, ended_section):
                              self.current_modification.name, self.force_field.name)
             self.current_modification.citations.update(self.citations)
             self.force_field.modifications[self.current_modification.name] = self.current_modification
-            # Add charge_group attribute to nodes that don't have one.
-            charge_group = 1
-            for node_idx in sorted(self.current_modification):
-                node = self.current_modification.nodes[node_idx]
-                if 'charge_group' not in node:
-                    node['charge_group'] = charge_group
-                charge_group = node['charge_group'] + 1
             self.current_modification = None
 
     def get_context(self, context_type=''):
diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py
index d9bd9aef6..ac43058ea 100644
--- a/vermouth/processors/do_mapping.py
+++ b/vermouth/processors/do_mapping.py
@@ -415,6 +415,8 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol):
     mod_to_out = {}
     # Some nodes of modification will already exist. The question is
     # which, and which index they have in graph_out.
+    # We'll store these anchors for now
+    anchors = set()
     for mod_idx in modification:
         if not node_should_exist(modification, mod_idx):
             # Node does not exist yet.
@@ -446,12 +448,21 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol):
                 raise ValueError("No node found in molecule with "
                                  "atomname {}".format(modification.nodes[mod_idx]['atomname']))
             # Undefined loop variable is guarded against by the else-raise above
+            anchors.add(mod_idx)
             mod_to_out[mod_idx] = out_idx  # pylint: disable=undefined-loop-variable
             graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) # pylint: disable=undefined-loop-variable
         graph_out.nodes[out_idx]['modifications'] = graph_out.nodes[out_idx].get('modifications', [])
         if modification not in graph_out.nodes[out_idx]['modifications']:
             graph_out.nodes[out_idx]['modifications'].append(modification)
 
+    # FIXME Jank here to ensure the charge_group attributes look reasonable
+    charge_group_start = max(graph_out.nodes[mod_to_out[idx]]['charge_group'] for idx in anchors)
+    for charge_group, mod_idx in enumerate(modification, charge_group_start):
+        out_idx = mod_to_out[mod_idx]
+        if 'charge_group' not in graph_out.nodes[out_idx]:
+            print(f'{format_atom_string(graph_out.nodes[out_idx])} {charge_group}')
+            graph_out.nodes[out_idx]['charge_group'] = charge_group
+
     for mol_idx in mol_to_mod:
         for mod_idx, weight in mol_to_mod[mol_idx].items():
             out_idx = mod_to_out[mod_idx]

From 533eba1cb38ca4640a2c530c7fd46d54a974f24d Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 28 Jun 2024 16:10:25 +0200
Subject: [PATCH 11/13] Unbreak ffinput test

---
 vermouth/ffinput.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/vermouth/ffinput.py b/vermouth/ffinput.py
index 84ec54b9e..d52859957 100644
--- a/vermouth/ffinput.py
+++ b/vermouth/ffinput.py
@@ -166,14 +166,12 @@ def finalize_section(self, previous_section, ended_section):
             self.current_block.citations.update(self.citations)
             self.current_block.make_edges_from_interactions()
             self.force_field.blocks[self.current_block.name] = self.current_block
-            self.current_block = None
 
         if self.current_link is not None:
             # add FF wide citations
             self.current_link.citations.update(self.citations)
             self.current_link.make_edges_from_interactions()
             self.force_field.links.append(self.current_link)
-            self.current_link = None
 
         if self.current_modification is not None:
             # add FF wide citations
@@ -182,7 +180,6 @@ def finalize_section(self, previous_section, ended_section):
                              self.current_modification.name, self.force_field.name)
             self.current_modification.citations.update(self.citations)
             self.force_field.modifications[self.current_modification.name] = self.current_modification
-            self.current_modification = None
 
     def get_context(self, context_type=''):
         possible_contexts = {

From dfadbbbb7e7c8d3443e309c9d50e351cb1da1313 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 28 Jun 2024 16:10:50 +0200
Subject: [PATCH 12/13] add charge groups to test_apply_mod_mapping nodes

---
 vermouth/processors/do_mapping.py | 3 +--
 vermouth/tests/test_do_mapping.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py
index ac43058ea..58f8a7886 100644
--- a/vermouth/processors/do_mapping.py
+++ b/vermouth/processors/do_mapping.py
@@ -456,11 +456,10 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol):
             graph_out.nodes[out_idx]['modifications'].append(modification)
 
     # FIXME Jank here to ensure the charge_group attributes look reasonable
-    charge_group_start = max(graph_out.nodes[mod_to_out[idx]]['charge_group'] for idx in anchors)
+    charge_group_start = max(graph_out.nodes[mod_to_out[idx]].get('charge_group', 1) for idx in anchors) if anchors else 1
     for charge_group, mod_idx in enumerate(modification, charge_group_start):
         out_idx = mod_to_out[mod_idx]
         if 'charge_group' not in graph_out.nodes[out_idx]:
-            print(f'{format_atom_string(graph_out.nodes[out_idx])} {charge_group}')
             graph_out.nodes[out_idx]['charge_group'] = charge_group
 
     for mol_idx in mol_to_mod:
diff --git a/vermouth/tests/test_do_mapping.py b/vermouth/tests/test_do_mapping.py
index 3916af84e..b9dcc084f 100644
--- a/vermouth/tests/test_do_mapping.py
+++ b/vermouth/tests/test_do_mapping.py
@@ -475,7 +475,7 @@ def test_apply_mod_mapping(modified_molecule, modifications):
     """
     graph_out = Molecule(force_field=FF_UNIVERSAL)
     graph_out.add_nodes_from([
-        (0, {'atomname': 'A', 'resid': 1})
+        (0, {'atomname': 'A', 'resid': 1, 'charge_group': 1})
     ])
     mol_to_out = {0: {0: 1}}
     out_to_mol = {0: {0: 1}}
@@ -492,7 +492,7 @@ def test_apply_mod_mapping(modified_molecule, modifications):
     assert mol_to_out == {0: {0: 1}, 1: {1: 1}}
     assert out_to_mol == {0: {0: 1}, 1: {1: 1}}
 
-    graph_out.add_node(2, atomname='J', resid=2)
+    graph_out.add_node(2, atomname='J', resid=2, charge_group=2)
     mol_to_out[16] = {2: 1}
     out_to_mol[2] = {16: 1}
 

From 05fd3268e7a46ab4764053a6782bae59de8622a5 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 5 Jul 2024 14:04:58 +0200
Subject: [PATCH 13/13] Make PTM atoms also apply their 'replace' attribute

---
 vermouth/processors/do_mapping.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py
index 58f8a7886..bf30479c6 100644
--- a/vermouth/processors/do_mapping.py
+++ b/vermouth/processors/do_mapping.py
@@ -426,6 +426,7 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol):
                 out_idx = max(graph_out) + 1
             mod_to_out[mod_idx] = out_idx
             graph_out.add_node(out_idx, **modification.nodes[mod_idx])
+            graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {}))
         else:
             # Node should already exist
             # We need to find the out_index of this node. Since the