From 9d23067e3b4299cc8484ce3c4dd623384fbbb5d9 Mon Sep 17 00:00:00 2001 From: bjkreitz Date: Mon, 31 Jul 2023 11:39:41 -0400 Subject: [PATCH 1/9] adding resonance for adsorbates incorporate feedback Add multidentate check to fragment Fix generate_adsorbate_conjugate_resonance_structures See discussion at https://github.com/ReactionMechanismGenerator/RMG-Py/pull/2511#discussion_r1299285925 Alon pointed out a valid pathway and Bjarne fixed it. incorporate review feedback make cosmetic adjustments to the code Tweaking pathfinder. We had declared 'paths' in the Cython declaration, so using that name (also it is clearer than path, since it is a list of paths). And fixing docstrings. Tweak docstring and linebreaks in molecule/resonance.py --- rmgpy/molecule/fragment.py | 13 ++++ rmgpy/molecule/molecule.pxd | 2 + rmgpy/molecule/molecule.py | 10 +++ rmgpy/molecule/pathfinder.pxd | 4 ++ rmgpy/molecule/pathfinder.py | 55 ++++++++++++++++ rmgpy/molecule/resonance.pxd | 6 ++ rmgpy/molecule/resonance.py | 120 +++++++++++++++++++++++++++++++++- 7 files changed, 209 insertions(+), 1 deletion(-) diff --git a/rmgpy/molecule/fragment.py b/rmgpy/molecule/fragment.py index 6e5c10652c..a4e3e98b64 100644 --- a/rmgpy/molecule/fragment.py +++ b/rmgpy/molecule/fragment.py @@ -1240,6 +1240,19 @@ def check_in_ring(self, rd_mol, mapped_atom_idx): return True return False + def is_multidentate(self): + """ + Return ``True`` if the adsorbate contains at least two binding sites, + or ``False`` otherwise. + """ + surface_sites = 0 + for atom in self.vertices: + if atom.is_surface_site(): + surface_sites += 1 + if surface_sites >= 2: + return True + return False + def from_smiles_like_string(self, smiles_like_string): smiles = smiles_like_string diff --git a/rmgpy/molecule/molecule.pxd b/rmgpy/molecule/molecule.pxd index 335486f76f..adc0c4a47f 100644 --- a/rmgpy/molecule/molecule.pxd +++ b/rmgpy/molecule/molecule.pxd @@ -301,6 +301,8 @@ cdef class Molecule(Graph): cpdef list get_adatoms(self) + cpdef bint is_multidentate(self) + cpdef list get_desorbed_molecules(self) cdef atom_id_counter diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 3f647c336a..09b8cd43ce 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -2864,6 +2864,16 @@ def get_surface_sites(self): cython.declare(atom=Atom) return [atom for atom in self.atoms if atom.is_surface_site()] + def is_multidentate(self): + """ + Return ``True`` if the adsorbate contains at least two binding sites, + or ``False`` otherwise. + """ + cython.declare(atom=Atom) + if len([atom for atom in self.atoms if atom.is_surface_site()])>=2: + return True + return False + def get_adatoms(self): """ Get a list of adatoms in the molecule. diff --git a/rmgpy/molecule/pathfinder.pxd b/rmgpy/molecule/pathfinder.pxd index 80e680d4e2..469417983f 100644 --- a/rmgpy/molecule/pathfinder.pxd +++ b/rmgpy/molecule/pathfinder.pxd @@ -58,3 +58,7 @@ cpdef list find_N5dc_radical_delocalization_paths(Vertex atom1) cpdef bint is_atom_able_to_gain_lone_pair(Vertex atom) cpdef bint is_atom_able_to_lose_lone_pair(Vertex atom) + +cpdef list find_adsorbate_delocalization_paths(Vertex atom1) + +cpdef list find_adsorbate_conjugate_delocalization_paths(Vertex atom1) \ No newline at end of file diff --git a/rmgpy/molecule/pathfinder.py b/rmgpy/molecule/pathfinder.py index c540b80d46..ffaca23861 100644 --- a/rmgpy/molecule/pathfinder.py +++ b/rmgpy/molecule/pathfinder.py @@ -480,3 +480,58 @@ def is_atom_able_to_lose_lone_pair(atom): return (((atom.is_nitrogen() or atom.is_sulfur()) and atom.lone_pairs in [1, 2, 3]) or (atom.is_oxygen() and atom.lone_pairs in [2, 3]) or atom.is_carbon() and atom.lone_pairs == 1) + + +def find_adsorbate_delocalization_paths(atom1): + """ + Find all multidentate adsorbates which have a bonding configuration X-C-C-X. + Examples: + + - XCXC, XCHXCH, XCXCH, where X is the surface site. The adsorption site X + is always placed on the left-hand side of the adatom and every adatom + is bonded to only one surface site X. + + In this transition atom1 and atom4 are surface sites while atom2 and atom3 + are carbon or nitrogen atoms. + """ + cython.declare(paths=list, atom2=Vertex, atom3=Vertex, atom4=Vertex, bond12=Edge, bond23=Edge, bond34=Edge) + + paths = [] + if atom1.is_surface_site(): + for atom2, bond12 in atom1.edges.items(): + if atom2.is_carbon(): + for atom3, bond23 in atom2.edges.items(): + if atom3.is_carbon(): + for atom4, bond34 in atom3.edges.items(): + if atom4.is_surface_site(): + paths.append([atom1, atom2, atom3, atom4, bond12, bond23, bond34]) + return paths + + +def find_adsorbate_conjugate_delocalization_paths(atom1): + """ + Find all multidentate adsorbates which have a bonding configuration X-C-C-C-X. + Examples: + + - XCHCHXCH/XCHCHXC, where X is the surface site. The adsorption site X + is always placed on the left-hand side of the adatom and every adatom + is bonded to only one surface site X. + + In this transition atom1 and atom5 are surface sites while atom2, atom3, + and atom4 are carbon or nitrogen atoms. + """ + + cython.declare(paths=list, atom2=Vertex, atom3=Vertex, atom4=Vertex, atom5=Vertex, bond12=Edge, bond23=Edge, bond34=Edge, bond45=Edge) + + paths = [] + if atom1.is_surface_site(): + for atom2, bond12 in atom1.edges.items(): + if atom2.is_carbon(): + for atom3, bond23 in atom2.edges.items(): + if atom3.is_carbon(): + for atom4, bond34 in atom3.edges.items(): + if atom2 is not atom4 and atom4.is_carbon(): + for atom5, bond45 in atom4.edges.items(): + if atom5.is_surface_site(): + paths.append([atom1, atom2, atom3, atom4, atom5, bond12, bond23, bond34, bond45]) + return paths diff --git a/rmgpy/molecule/resonance.pxd b/rmgpy/molecule/resonance.pxd index e3b01db841..7688b29eff 100644 --- a/rmgpy/molecule/resonance.pxd +++ b/rmgpy/molecule/resonance.pxd @@ -63,3 +63,9 @@ cpdef list generate_clar_structures(Graph mol, bint save_order=?) cpdef list _clar_optimization(Graph mol, list constraints=?, max_num=?, save_order=?) cpdef list _clar_transformation(Graph mol, list aromatic_ring) + +cpdef list generate_adsorbate_shift_down_resonance_structures(Graph mol) + +cpdef list generate_adsorbate_shift_up_resonance_structures(Graph mol) + +cpdef list generate_adsorbate_conjugate_resonance_structures(Graph mol) \ No newline at end of file diff --git a/rmgpy/molecule/resonance.py b/rmgpy/molecule/resonance.py index b207568e71..be0456f519 100644 --- a/rmgpy/molecule/resonance.py +++ b/rmgpy/molecule/resonance.py @@ -49,6 +49,10 @@ - ``generate_kekule_structure``: generate a single Kekule structure for an aromatic compound (single/double bond form) - ``generate_opposite_kekule_structure``: for monocyclic aromatic species, rotate the double bond assignment - ``generate_clar_structures``: generate all structures with the maximum number of pi-sextet assignments +- Multidentate adsorbates only + - ``generate_adsorbate_shift_down_resonance_structures``: shift 2 electrons from a C=/#C bond to the X-C bond + - ``generate_adsorbate_shift_up_resonance_structures``: shift 2 electrons from a X=/#C bond to a C-C bond + - ``generate_adsorbate_conjugate_resonance_structures``: shift 2 electrons in a conjugate pi system for bridged X-C-C-C-X adsorbates """ import logging @@ -87,6 +91,9 @@ def populate_resonance_algorithms(features=None): generate_aryne_resonance_structures, generate_kekule_structure, generate_clar_structures, + generate_adsorbate_shift_down_resonance_structures, + generate_adsorbate_shift_up_resonance_structures, + generate_adsorbate_conjugate_resonance_structures ] else: # If the molecule is aromatic, then radical resonance has already been considered @@ -110,7 +117,10 @@ def populate_resonance_algorithms(features=None): # solution. A more holistic approach would be to identify these cases in generate_resonance_structures, # and pass a list of forbidden atom ID's to find_lone_pair_multiple_bond_paths. method_list.append(generate_lone_pair_multiple_bond_resonance_structures) - + if features['is_multidentate']: + method_list.append(generate_adsorbate_shift_down_resonance_structures) + method_list.append(generate_adsorbate_shift_up_resonance_structures) + method_list.append(generate_adsorbate_conjugate_resonance_structures) return method_list @@ -130,6 +140,7 @@ def analyze_molecule(mol, save_order=False): 'is_aryl_radical': False, 'hasNitrogenVal5': False, 'hasLonePairs': False, + 'is_multidentate': mol.is_multidentate(), } if features['is_cyclic']: @@ -1117,3 +1128,110 @@ def _clar_transformation(mol, aromatic_ring): for bond in bond_list: bond.order = 1.5 + + +def generate_adsorbate_shift_down_resonance_structures(mol): + """ + Generate all of the resonance structures formed by the shift a pi bond between two C-C atoms to both X-C bonds. + Example XCHXCH: [X]C=C[X] <=> [X]=CC=[X] + (where '=' denotes a double bond) + """ + cython.declare(structures=list, paths=list, index=cython.int, structure=Graph) + cython.declare(atom=Vertex, atom1=Vertex, atom2=Vertex, atom3=Vertex, atom4=Vertex, bond12=Edge, bond23=Edge, bond34=Edge) + cython.declare(v1=Vertex, v2=Vertex) + + structures = [] + if mol.is_multidentate(): + for atom in mol.vertices: + paths = pathfinder.find_adsorbate_delocalization_paths(atom) + for atom1, atom2, atom3, atom4, bond12, bond23, bond34 in paths: + if bond23.is_single(): + continue + else: + bond12.increment_order() + bond23.decrement_order() + bond34.increment_order() + structure = mol.copy(deep=True) + bond12.decrement_order() + bond23.increment_order() + bond34.decrement_order() + try: + structure.update_atomtypes(log_species=False) + except AtomTypeError: + pass + else: + structures.append(structure) + return structures + + +def generate_adsorbate_shift_up_resonance_structures(mol): + """ + Generate all of the resonance structures formed by the shift of two electrons from X-C bonds to increase the bond + order between two C-C atoms by 1. + Example XCHXCH: [X]=CC=[X] <=> [X]C=C[X] + (where '=' denotes a double bond, '#' denotes a triple bond) + """ + cython.declare(structures=list, paths=list, index=cython.int, structure=Graph) + cython.declare(atom=Vertex, atom1=Vertex, atom2=Vertex, atom3=Vertex, atom4=Vertex, bond12=Edge, bond23=Edge, bond34=Edge) + cython.declare(v1=Vertex, v2=Vertex) + + structures = [] + if mol.is_multidentate(): + for atom in mol.vertices: + paths = pathfinder.find_adsorbate_delocalization_paths(atom) + for atom1, atom2, atom3, atom4, bond12, bond23, bond34 in paths: + if (bond12.is_double() and bond23.is_single() and bond34.is_double()) or (bond23.is_double() and bond12.is_double() and bond34.is_double()) or (bond12.is_triple() and bond23.is_single() and bond34.is_triple()) or (bond12.is_triple() and bond23.is_single() and bond34.is_double()): + bond12.decrement_order() + bond23.increment_order() + bond34.decrement_order() + structure = mol.copy(deep=True) + bond12.increment_order() + bond23.decrement_order() + bond34.increment_order() + try: + structure.update_atomtypes(log_species=False) + except AtomTypeError: + pass + else: + structures.append(structure) + return structures + + +def generate_adsorbate_conjugate_resonance_structures(mol): + """ + Generate all of the resonance structures formed by the shift of two + electrons in a conjugated pi bond system of a bidentate adsorbate + with a bridging atom in between. + + Example XCHCHXC: [X]#CC=C[X] <=> [X]=C=CC=[X] + (where '#' denotes a triple bond, '=' denotes a double bond) + """ + cython.declare(structures=list, paths=list, index=cython.int, structure=Graph) + cython.declare(atom=Vertex, atom1=Vertex, atom2=Vertex, atom3=Vertex, atom4=Vertex, atom5=Vertex, bond12=Edge, bond23=Edge, bond34=Edge, bond45=Edge) + cython.declare(v1=Vertex, v2=Vertex) + + structures = [] + if mol.is_multidentate(): + for atom in mol.vertices: + paths = pathfinder.find_adsorbate_conjugate_delocalization_paths(atom) + for atom1, atom2, atom3, atom4, atom45, bond12, bond23, bond34, bond45 in paths: + if ((bond12.is_double() or bond12.is_triple()) and + (bond23.is_single() or bond23.is_double()) and + (bond34.is_double() or bond34.is_triple()) and + (bond45.is_single() or bond45.is_double())): + bond12.decrement_order() + bond23.increment_order() + bond34.decrement_order() + bond45.increment_order() + structure = mol.copy(deep=True) + bond12.increment_order() + bond23.decrement_order() + bond34.increment_order() + bond45.decrement_order() + try: + structure.update_atomtypes(log_species=False) + except AtomTypeError: + pass + else: + structures.append(structure) + return structures From 89d134a45c063cdf34dd4a4ab502e79a34ad6e30 Mon Sep 17 00:00:00 2001 From: bjkreitz Date: Wed, 6 Sep 2023 11:11:23 -0400 Subject: [PATCH 2/9] add resonance to get_thermo_for_surface_species --- rmgpy/data/thermo.py | 137 ++++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 59 deletions(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 34f9630dac..ae00f34aa8 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1560,77 +1560,96 @@ def get_thermo_data_for_surface_species(self, species): Returns a :class:`ThermoData` object, with no Cp0 or CpInf """ + # define the comparison function to find the lowest energy + + def lowest_energy(species): + if hasattr(species.thermo, 'H298'): + return species.thermo.H298.value_si + else: + return species.thermo.get_enthalpy(298.0) + if species.is_surface_site(): raise DatabaseError("Can't estimate thermo of vacant site. Should be in library (and should be 0).") logging.debug("Trying to generate thermo for surface species using first of %d resonance isomer(s):", len(species.molecule)) - molecule = species.molecule[0] - # store any labeled atoms to reapply at the end - labeled_atoms = molecule.get_all_labeled_atoms() - molecule.clear_labeled_atoms() - logging.debug("Before removing from surface:\n" + molecule.to_adjacency_list()) - # only want/need to do one resonance structure, - # because will need to regenerate others in gas phase - dummy_molecules = molecule.get_desorbed_molecules() - for mol in dummy_molecules: - mol.clear_labeled_atoms() - if len(dummy_molecules) == 0: - raise RuntimeError(f"Cannot get thermo for gas-phase molecule. No valid dummy molecules from original molecule:\n{molecule.to_adjacency_list()}") + + resonance_data = [] + + # iterate over all resonance structures of the species + + for i in range(len(species.molecule)): + molecule = species.molecule[i] + # store any labeled atoms to reapply at the end + labeled_atoms = molecule.get_all_labeled_atoms() + molecule.clear_labeled_atoms() + logging.debug("Before removing from surface:\n" + molecule.to_adjacency_list()) + # get all desorbed molecules for a given adsorbate + dummy_molecules = molecule.get_desorbed_molecules() + for mol in dummy_molecules: + mol.clear_labeled_atoms() + if len(dummy_molecules) == 0: + raise RuntimeError(f"Cannot get thermo for gas-phase molecule. No valid dummy molecules from original molecule:\n{molecule.to_adjacency_list()}") - # if len(molecule) > 1, it will assume all resonance structures have already been generated when it tries to generate them, so evaluate each configuration separately and pick the lowest energy one by H298 value - gas_phase_species_from_libraries = [] - gas_phase_species_estimates = [] - for dummy_molecule in dummy_molecules: - dummy_species = Species() - dummy_species.molecule = [dummy_molecule] - dummy_species.generate_resonance_structures() - dummy_species.thermo = self.get_thermo_data(dummy_species) - if dummy_species.thermo.label: - gas_phase_species_from_libraries.append(dummy_species) - else: - gas_phase_species_estimates.append(dummy_species) + # if len(molecule) > 1, it will assume all resonance structures have already been generated when it tries to generate them, so evaluate each configuration separately and pick the lowest energy one by H298 value + gas_phase_species_from_libraries = [] + gas_phase_species_estimates = [] + for dummy_molecule in dummy_molecules: + dummy_species = Species() + dummy_species.molecule = [dummy_molecule] + dummy_species.generate_resonance_structures() + dummy_species.thermo = self.get_thermo_data(dummy_species) + if dummy_species.thermo.label: + gas_phase_species_from_libraries.append(dummy_species) + else: + gas_phase_species_estimates.append(dummy_species) - # define the comparison function to find the lowest energy - def lowest_energy(species): - if hasattr(species.thermo, 'H298'): - return species.thermo.H298.value_si + if gas_phase_species_from_libraries: + gas_phase_species = min(gas_phase_species_from_libraries, key=lowest_energy) else: - return species.thermo.get_enthalpy(298.0) + gas_phase_species = min(gas_phase_species_estimates, key=lowest_energy) - if gas_phase_species_from_libraries: - species = min(gas_phase_species_from_libraries, key=lowest_energy) - else: - species = min(gas_phase_species_estimates, key=lowest_energy) + thermo = gas_phase_species.thermo + thermo.comment = f"Gas phase thermo for {thermo.label or gas_phase_species.molecule[0].to_smiles()} from {thermo.comment}. Adsorption correction:" + logging.debug("Using thermo from gas phase for species {}\n".format(gas_phase_species.label) + repr(thermo)) - thermo = species.thermo - thermo.comment = f"Gas phase thermo for {thermo.label or species.molecule[0].to_smiles()} from {thermo.comment}. Adsorption correction:" - logging.debug("Using thermo from gas phase for species {}\n".format(species.label) + repr(thermo)) + if not isinstance(thermo, ThermoData): + thermo = thermo.to_thermo_data() + find_cp0_and_cpinf(gas_phase_species, thermo) - if not isinstance(thermo, ThermoData): - thermo = thermo.to_thermo_data() - find_cp0_and_cpinf(species, thermo) + # Get the adsorption energy + # Create the ThermoData object - # Get the adsorption energy - # Create the ThermoData object - adsorption_thermo = ThermoData( - Tdata=([300, 400, 500, 600, 800, 1000, 1500], "K"), - Cpdata=([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "J/(mol*K)"), - H298=(0.0, "kJ/mol"), - S298=(0.0, "J/(mol*K)"), - ) + adsorption_thermo = ThermoData( + Tdata=([300, 400, 500, 600, 800, 1000, 1500], "K"), + Cpdata=([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "J/(mol*K)"), + H298=(0.0, "kJ/mol"), + S298=(0.0, "J/(mol*K)"), + ) - surface_sites = molecule.get_surface_sites() - try: - self._add_adsorption_correction(adsorption_thermo, self.groups[self.adsorption_groups], molecule, surface_sites) - except (KeyError, DatabaseError): - logging.error("Couldn't find in adsorption thermo database:") - logging.error(molecule) - logging.error(molecule.to_adjacency_list()) - raise + surface_sites = molecule.get_surface_sites() + try: + self._add_adsorption_correction(adsorption_thermo, self.groups['adsorptionPt111'], molecule, surface_sites) + except (KeyError, DatabaseError): + logging.error("Couldn't find in adsorption thermo database:") + logging.error(molecule) + logging.error(molecule.to_adjacency_list()) + raise + + # (group_additivity=True means it appends the comments) + add_thermo_data(thermo, adsorption_thermo, group_additivity=True) + + resonance_data.append(thermo) + + # Get the enthalpy of formation of every adsorbate at 298 K and determine the resonance structure with the lowest enthalpy of formation + # We assume that the lowest enthalpy of formation is the correct thermodynamic property for the adsorbate - # (group_additivity=True means it appends the comments) - add_thermo_data(thermo, adsorption_thermo, group_additivity=True) + enthalpy298 = [] + + for i in range(len(resonance_data)): + enthalpy298.append(resonance_data[i].get_enthalpy(298)) + + thermo = resonance_data[np.argmin(enthalpy298)] if thermo.label: thermo.label += 'X' * len(surface_sites) @@ -2574,7 +2593,7 @@ def _add_group_thermo_data(self, thermo_data, database, molecule, atom): node = node0 while node is not None and node.data is None: node = node.parent - if node is None: + if node is None: raise DatabaseError(f'Unable to determine thermo parameters for atom {atom} in molecule {molecule}: ' f'no data for node {node0} or any of its ancestors in database {database.label}.\n' + molecule.to_adjacency_list()) @@ -2872,7 +2891,6 @@ def register_in_central_thermo_db(self, species): except ValueError: logging.info('Fail to generate inchi/smiles for species below:\n{0}'.format(species.to_adjacency_list())) - def find_cp0_and_cpinf(species, heat_capacity): """ Calculate the Cp0 and CpInf values, and add them to the HeatCapacityModel object. @@ -2883,3 +2901,4 @@ def find_cp0_and_cpinf(species, heat_capacity): if heat_capacity.CpInf is None: cp_inf = species.calculate_cpinf() heat_capacity.CpInf = (cp_inf, "J/(mol*K)") + From e16559173c9bd58a76cdcabba874d4c27c42bf4b Mon Sep 17 00:00:00 2001 From: bjkreitz Date: Wed, 6 Sep 2023 16:39:58 -0400 Subject: [PATCH 3/9] add tests and fix pathfinderTest --- test/rmgpy/molecule/moleculeTest.py | 20 +++++++++ test/rmgpy/molecule/pathfinderTest.py | 48 ++++++++++++++++++++-- test/rmgpy/molecule/resonanceTest.py | 58 +++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 4 deletions(-) diff --git a/test/rmgpy/molecule/moleculeTest.py b/test/rmgpy/molecule/moleculeTest.py index f9ee25000d..9621d6fc7d 100644 --- a/test/rmgpy/molecule/moleculeTest.py +++ b/test/rmgpy/molecule/moleculeTest.py @@ -2139,6 +2139,26 @@ def test_surface_molecules(self): assert adsorbed.number_of_surface_sites() == 1 assert bidentate.number_of_surface_sites() == 2 + def test_is_multidentate(self): + """ + Test that we can identify a multidentate adsorbate + """ + monodentate = Molecule().from_adjacency_list( + """ + 1 H u0 p0 c0 {2,S} + 2 X u0 p0 c0 {1,S} + """ + ) + adjlist = """ + 1 X u0 p0 c0 {3,S} + 2 X u0 p0 c0 {4,S} + 3 C u0 p0 c0 {1,S} {4,T} + 4 C u0 p0 c0 {2,S} {3,T} + """ + bidentate = Molecule().from_adjacency_list(adjlist) + assert not monodentate.is_multidentate() + assert bidentate.is_multidentate() + def test_malformed_augmented_inchi(self): """Test that augmented inchi without InChI layer raises Exception.""" malform_aug_inchi = "foo" diff --git a/test/rmgpy/molecule/pathfinderTest.py b/test/rmgpy/molecule/pathfinderTest.py index ef2dfbfa55..9949b3be80 100644 --- a/test/rmgpy/molecule/pathfinderTest.py +++ b/test/rmgpy/molecule/pathfinderTest.py @@ -41,6 +41,8 @@ find_lone_pair_multiple_bond_paths, find_N5dc_radical_delocalization_paths, find_shortest_path, + find_adsorbate_delocalization_paths, + find_adsorbate_conjugate_delocalization_paths, ) @@ -458,7 +460,7 @@ def test_h2nnoo(self): assert paths -class FindAdjLonePairRadicalDelocalizationPaths: +class FindAdjLonePairRadicalDelocalizationPathsTest: """ test the find_lone_pair_radical_delocalization_paths method """ @@ -490,7 +492,7 @@ def test_double_bond(self): assert paths -class FindAdjLonePairMultipleBondDelocalizationPaths: +class FindAdjLonePairMultipleBondDelocalizationPathsTest: """ test the find_lone_pair_multiple_bond_delocalization_paths method """ @@ -502,7 +504,7 @@ def test_sho3(self): assert paths -class FindAdjLonePairRadicalMultipleBondDelocalizationPaths: +class FindAdjLonePairRadicalMultipleBondDelocalizationPathsTest: """ test the find_lone_pair_radical_multiple_bond_delocalization_paths method """ @@ -520,7 +522,7 @@ def test_hso3(self): assert paths -class FindN5dcRadicalDelocalizationPaths: +class FindN5dcRadicalDelocalizationPathsTest: """ test the find_N5dc_radical_delocalization_paths method """ @@ -530,3 +532,41 @@ def test_hnnoo(self): mol = Molecule().from_smiles(smiles) paths = find_N5dc_radical_delocalization_paths(mol.atoms[1]) assert paths + +class FindAdsorbateDelocalizationPathsTest: + """ + test the find_adsorbate_delocalization_paths method + """ + + def test_cch(self): + adjlist = """ +1 X u0 p0 c0 {3,D} +2 X u0 p0 c0 {4,S} +3 C u0 p0 c0 {1,D} {4,D} +4 C u0 p0 c0 {2,S} {3,D} {5,S} +5 H u0 p0 c0 {4,S} + """ + + mol = Molecule().from_adjacency_list(adjlist) + paths = find_adsorbate_delocalization_paths(mol.atoms[0]) + assert paths + +class FindAdsorbateConjugateDelocalizationPathsTest: + """ + test the find_adsorbate_conjugate_delocalization_paths + """ + + def test_chchc(self): + adjlist = """ +1 X u0 p0 c0 {3,T} +2 X u0 p0 c0 {5,S} +3 C u0 p0 c0 {1,T} {4,S} +4 C u0 p0 c0 {3,S} {5,D} {6,S} +5 C u0 p0 c0 {2,S} {4,D} {7,S} +6 H u0 p0 c0 {4,S} +7 H u0 p0 c0 {5,S} + """ + + mol = Molecule().from_adjacency_list(adjlist) + paths = find_adsorbate_conjugate_delocalization_paths(mol.atoms[0]) + assert paths \ No newline at end of file diff --git a/test/rmgpy/molecule/resonanceTest.py b/test/rmgpy/molecule/resonanceTest.py index 2130307e25..19a8eb7612 100644 --- a/test/rmgpy/molecule/resonanceTest.py +++ b/test/rmgpy/molecule/resonanceTest.py @@ -1363,6 +1363,64 @@ def test_resonance_without_changing_atom_order2(self): atom2_nb = {nb.id for nb in list(atom2.bonds.keys())} assert atom1_nb == atom2_nb + def test_adsorbate_resonance_cc1(self): + """Test if all three resonance structures for X#CC#X are generated""" + adjlist = """ +1 X u0 p0 c0 {3,T} +2 X u0 p0 c0 {4,T} +3 C u0 p0 c0 {1,T} {4,S} +4 C u0 p0 c0 {2,T} {3,S} + """ + + mol = Molecule().from_adjacency_list(adjlist) + + mol_list = generate_resonance_structures(mol) + assert len(mol_list) == 3 + + def test_adsorbate_resonance_cc2(self): + """Test if all three resonance structures for X=C=C=X are generated""" + adjlist = """ +1 X u0 p0 c0 {3,D} +2 X u0 p0 c0 {4,D} +3 C u0 p0 c0 {1,D} {4,D} +4 C u0 p0 c0 {2,D} {3,D} + """ + + mol = Molecule().from_adjacency_list(adjlist) + + mol_list = generate_resonance_structures(mol) + assert len(mol_list) == 3 + + def test_adsorbate_resonance_cc3(self): + """Test if all three resonance structures for XC#CX are generated""" + adjlist = """ +1 X u0 p0 c0 {3,S} +2 X u0 p0 c0 {4,S} +3 C u0 p0 c0 {1,S} {4,T} +4 C u0 p0 c0 {2,S} {3,T} + """ + + mol = Molecule().from_adjacency_list(adjlist) + + mol_list = generate_resonance_structures(mol) + assert len(mol_list) == 3 + + def test_adsorbate_resonance_chchc(self): + """Test if both resonance structures for XCHCHXC are generated""" + adjlist = """ +1 X u0 p0 c0 {3,T} +2 X u0 p0 c0 {5,S} +3 C u0 p0 c0 {1,T} {4,S} +4 C u0 p0 c0 {3,S} {5,D} {6,S} +5 C u0 p0 c0 {2,S} {4,D} {7,S} +6 H u0 p0 c0 {4,S} +7 H u0 p0 c0 {5,S} + """ + + mol = Molecule().from_adjacency_list(adjlist) + + mol_list = generate_resonance_structures(mol) + assert len(mol_list) == 2 class ClarTest: """ From 2e3fd4ed965453055b54e3f8ab85bddb3e3ae10b Mon Sep 17 00:00:00 2001 From: Bjarne Kreitz Date: Mon, 25 Mar 2024 14:15:53 -0400 Subject: [PATCH 4/9] include N --- rmgpy/molecule/pathfinder.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rmgpy/molecule/pathfinder.py b/rmgpy/molecule/pathfinder.py index ffaca23861..8bce591a4d 100644 --- a/rmgpy/molecule/pathfinder.py +++ b/rmgpy/molecule/pathfinder.py @@ -499,9 +499,9 @@ def find_adsorbate_delocalization_paths(atom1): paths = [] if atom1.is_surface_site(): for atom2, bond12 in atom1.edges.items(): - if atom2.is_carbon(): + if atom2.is_carbon() or atom2.is_nitrogen(): for atom3, bond23 in atom2.edges.items(): - if atom3.is_carbon(): + if atom3.is_carbon() or atom3.is_nitrogen(): for atom4, bond34 in atom3.edges.items(): if atom4.is_surface_site(): paths.append([atom1, atom2, atom3, atom4, bond12, bond23, bond34]) @@ -526,11 +526,11 @@ def find_adsorbate_conjugate_delocalization_paths(atom1): paths = [] if atom1.is_surface_site(): for atom2, bond12 in atom1.edges.items(): - if atom2.is_carbon(): + if atom2.is_carbon() or atom2.is_nitrogen(): for atom3, bond23 in atom2.edges.items(): - if atom3.is_carbon(): + if atom3.is_carbon() or atom3.is_nitrogen(): for atom4, bond34 in atom3.edges.items(): - if atom2 is not atom4 and atom4.is_carbon(): + if atom2 is not atom4 and (atom4.is_carbon() or atom4.is_nitrogen()): for atom5, bond45 in atom4.edges.items(): if atom5.is_surface_site(): paths.append([atom1, atom2, atom3, atom4, atom5, bond12, bond23, bond34, bond45]) From 37b31ebce1b644e6c3f83e90c80ea50ff969f881 Mon Sep 17 00:00:00 2001 From: Richard West Date: Tue, 12 Nov 2024 23:50:01 -0500 Subject: [PATCH 5/9] Restore the atom labels to every molecule. I think this part that puts the atom labels back should probably be inside the loop where we took them off so that all the molecules get their labels back. --- rmgpy/data/thermo.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index ae00f34aa8..079aacd3f3 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1640,6 +1640,13 @@ def lowest_energy(species): add_thermo_data(thermo, adsorption_thermo, group_additivity=True) resonance_data.append(thermo) + # if the molecule had labels, reapply them + for label, atom in labeled_atoms.items(): + if isinstance(atom,list): + for a in atom: + a.label = label + else: + atom.label = label # Get the enthalpy of formation of every adsorbate at 298 K and determine the resonance structure with the lowest enthalpy of formation # We assume that the lowest enthalpy of formation is the correct thermodynamic property for the adsorbate @@ -1656,14 +1663,6 @@ def lowest_energy(species): find_cp0_and_cpinf(species, thermo) - # if the molecule had labels, reapply them - for label,atom in labeled_atoms.items(): - if isinstance(atom,list): - for a in atom: - a.label = label - else: - atom.label = label - return thermo def _add_adsorption_correction(self, adsorption_thermo, adsorption_groups, molecule, surface_sites): From cc33d8940ef58f9ee1d6d30174f33dba15631902 Mon Sep 17 00:00:00 2001 From: Richard West Date: Tue, 12 Nov 2024 23:54:49 -0500 Subject: [PATCH 6/9] Estimating thermo of an adsorbate Species now re-orders Species.molecule. We do this in the gas phase, so now also adsorbates: If you have a Species with multiple Molecules (resonance structures) then the list of Molecules gets re-ordered when you estimate the thermo so that the most stable (lowest H298) goes first. This will then be used for drawing pictures, printing SMILES strings, etc. which might mean it's no the one that you started with. This may surprise some people in some situations, but I think is the way the gas phase works, and it has some benefits, especially for machine-generated species. One thing we don't yet do in adsorbates is prioritize resonance structures coming from thermo libraries, over those that are merely estimated. Not sure of the consequences of this. Other changes in this commit are mostly for clarity. --- rmgpy/data/thermo.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 079aacd3f3..2b14b131f6 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1561,8 +1561,7 @@ def get_thermo_data_for_surface_species(self, species): """ # define the comparison function to find the lowest energy - - def lowest_energy(species): + def species_enthalpy(species): if hasattr(species.thermo, 'H298'): return species.thermo.H298.value_si else: @@ -1577,9 +1576,7 @@ def lowest_energy(species): resonance_data = [] # iterate over all resonance structures of the species - - for i in range(len(species.molecule)): - molecule = species.molecule[i] + for molecule in species.molecule: # store any labeled atoms to reapply at the end labeled_atoms = molecule.get_all_labeled_atoms() molecule.clear_labeled_atoms() @@ -1591,7 +1588,10 @@ def lowest_energy(species): if len(dummy_molecules) == 0: raise RuntimeError(f"Cannot get thermo for gas-phase molecule. No valid dummy molecules from original molecule:\n{molecule.to_adjacency_list()}") - # if len(molecule) > 1, it will assume all resonance structures have already been generated when it tries to generate them, so evaluate each configuration separately and pick the lowest energy one by H298 value + # if len(molecule) > 1, it will assume all resonance structures + # have already been generated when it tries to generate them, so + # evaluate each configuration separately and pick the lowest energy + # one by H298 value gas_phase_species_from_libraries = [] gas_phase_species_estimates = [] for dummy_molecule in dummy_molecules: @@ -1605,13 +1605,13 @@ def lowest_energy(species): gas_phase_species_estimates.append(dummy_species) if gas_phase_species_from_libraries: - gas_phase_species = min(gas_phase_species_from_libraries, key=lowest_energy) + gas_phase_species = min(gas_phase_species_from_libraries, key=species_enthalpy) else: - gas_phase_species = min(gas_phase_species_estimates, key=lowest_energy) + gas_phase_species = min(gas_phase_species_estimates, key=species_enthalpy) thermo = gas_phase_species.thermo thermo.comment = f"Gas phase thermo for {thermo.label or gas_phase_species.molecule[0].to_smiles()} from {thermo.comment}. Adsorption correction:" - logging.debug("Using thermo from gas phase for species {}\n".format(gas_phase_species.label) + repr(thermo)) + logging.debug("Using thermo from gas phase for species %s: %r", gas_phase_species.label, thermo) if not isinstance(thermo, ThermoData): thermo = thermo.to_thermo_data() @@ -1619,7 +1619,6 @@ def lowest_energy(species): # Get the adsorption energy # Create the ThermoData object - adsorption_thermo = ThermoData( Tdata=([300, 400, 500, 600, 800, 1000, 1500], "K"), Cpdata=([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "J/(mol*K)"), @@ -1639,7 +1638,6 @@ def lowest_energy(species): # (group_additivity=True means it appends the comments) add_thermo_data(thermo, adsorption_thermo, group_additivity=True) - resonance_data.append(thermo) # if the molecule had labels, reapply them for label, atom in labeled_atoms.items(): if isinstance(atom,list): @@ -1648,15 +1646,21 @@ def lowest_energy(species): else: atom.label = label - # Get the enthalpy of formation of every adsorbate at 298 K and determine the resonance structure with the lowest enthalpy of formation - # We assume that the lowest enthalpy of formation is the correct thermodynamic property for the adsorbate + # a tuple of the H298, the ThermoData object, and the molecule + resonance_data.append((thermo.get_enthalpy(298), thermo, molecule)) + + # Get the enthalpy of formation of every adsorbate at 298 K and + # determine the resonance structure with the lowest enthalpy of formation. + # We assume that the lowest enthalpy of formation is the correct + # thermodynamic property for the adsorbate. - enthalpy298 = [] + # first element of each tuple is H298, so sort by that + resonance_data = sorted(resonance_data) - for i in range(len(resonance_data)): - enthalpy298.append(resonance_data[i].get_enthalpy(298)) + # reorder the resonance structures (molecules) by their H298 + species.molecule = [mol for h, thermo, mol in resonance_data] - thermo = resonance_data[np.argmin(enthalpy298)] + thermo = resonance_data[0][1] if thermo.label: thermo.label += 'X' * len(surface_sites) From 20843b0bd089fb7a8aaf8964b01a6badd0f97efa Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 13 Nov 2024 08:31:34 -0500 Subject: [PATCH 7/9] Fix bug in sorting molecules. If two resonance structures had the same H298 then it would look to the second element of the tuples to sort and crash. This should avoid that (hopefully). --- rmgpy/data/thermo.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 2b14b131f6..f354636c26 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -1646,19 +1646,18 @@ def species_enthalpy(species): else: atom.label = label - # a tuple of the H298, the ThermoData object, and the molecule - resonance_data.append((thermo.get_enthalpy(298), thermo, molecule)) + # a tuple of molecule and its thermo + resonance_data.append((molecule, thermo)) # Get the enthalpy of formation of every adsorbate at 298 K and # determine the resonance structure with the lowest enthalpy of formation. # We assume that the lowest enthalpy of formation is the correct - # thermodynamic property for the adsorbate. + # thermodynamic property for the adsorbate, and the preferred representation. - # first element of each tuple is H298, so sort by that - resonance_data = sorted(resonance_data) + resonance_data = sorted(resonance_data, key=lambda x: x[1].H298.value_si) # reorder the resonance structures (molecules) by their H298 - species.molecule = [mol for h, thermo, mol in resonance_data] + species.molecule = [mol for mol, thermo in resonance_data] thermo = resonance_data[0][1] From e7f0f121d05554abb7f6c441b00f61914ee4e114 Mon Sep 17 00:00:00 2001 From: Bjarne Kreitz Date: Wed, 4 Dec 2024 22:53:53 -0500 Subject: [PATCH 8/9] add is_double_or_triple function to Bond class update adsorbate_shift_up_resonance_structures with double_or_triple function update adsorbate_conjugate_resonance_structure with double_or_triple function --- rmgpy/molecule/molecule.py | 7 +++++++ rmgpy/molecule/resonance.py | 33 +++++++++++++++++---------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 09b8cd43ce..5e02476550 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -883,6 +883,13 @@ def is_quadruple(self): """ return self.is_order(4) + def is_double_or_triple(self): + """ + Return ``True`` if the bond represents a double or triple bond or ``False`` + if not. + """ + return self.is_order(2) or self.is_order(3) + def is_benzene(self): """ Return ``True`` if the bond represents a benzene bond or ``False`` if diff --git a/rmgpy/molecule/resonance.py b/rmgpy/molecule/resonance.py index be0456f519..0cca99734c 100644 --- a/rmgpy/molecule/resonance.py +++ b/rmgpy/molecule/resonance.py @@ -1180,20 +1180,21 @@ def generate_adsorbate_shift_up_resonance_structures(mol): for atom in mol.vertices: paths = pathfinder.find_adsorbate_delocalization_paths(atom) for atom1, atom2, atom3, atom4, bond12, bond23, bond34 in paths: - if (bond12.is_double() and bond23.is_single() and bond34.is_double()) or (bond23.is_double() and bond12.is_double() and bond34.is_double()) or (bond12.is_triple() and bond23.is_single() and bond34.is_triple()) or (bond12.is_triple() and bond23.is_single() and bond34.is_double()): - bond12.decrement_order() - bond23.increment_order() - bond34.decrement_order() - structure = mol.copy(deep=True) - bond12.increment_order() - bond23.decrement_order() - bond34.increment_order() - try: - structure.update_atomtypes(log_species=False) - except AtomTypeError: - pass - else: - structures.append(structure) + if ((bond12.is_double_or_triple() and bond23.is_single() and bond34.is_double_or_triple()) or + (bond12.is_double() and bond23.is_double() and bond34.is_double())): + bond12.decrement_order() + bond23.increment_order() + bond34.decrement_order() + structure = mol.copy(deep=True) + bond12.increment_order() + bond23.decrement_order() + bond34.increment_order() + try: + structure.update_atomtypes(log_species=False) + except AtomTypeError: + pass + else: + structures.append(structure) return structures @@ -1215,9 +1216,9 @@ def generate_adsorbate_conjugate_resonance_structures(mol): for atom in mol.vertices: paths = pathfinder.find_adsorbate_conjugate_delocalization_paths(atom) for atom1, atom2, atom3, atom4, atom45, bond12, bond23, bond34, bond45 in paths: - if ((bond12.is_double() or bond12.is_triple()) and + if (bond12.is_double_or_triple() and (bond23.is_single() or bond23.is_double()) and - (bond34.is_double() or bond34.is_triple()) and + bond34.is_double_or_triple() and (bond45.is_single() or bond45.is_double())): bond12.decrement_order() bond23.increment_order() From 2ae7a9bf0f9920469674c3eae7fac28b51faf594 Mon Sep 17 00:00:00 2001 From: Bjarne Kreitz Date: Fri, 13 Dec 2024 14:19:27 -0500 Subject: [PATCH 9/9] improve pathfinderTest with more tests --- test/rmgpy/molecule/pathfinderTest.py | 45 +++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/test/rmgpy/molecule/pathfinderTest.py b/test/rmgpy/molecule/pathfinderTest.py index 9949b3be80..179e9185fe 100644 --- a/test/rmgpy/molecule/pathfinderTest.py +++ b/test/rmgpy/molecule/pathfinderTest.py @@ -538,7 +538,7 @@ class FindAdsorbateDelocalizationPathsTest: test the find_adsorbate_delocalization_paths method """ - def test_cch(self): + def test_xcxch(self): adjlist = """ 1 X u0 p0 c0 {3,D} 2 X u0 p0 c0 {4,S} @@ -549,14 +549,33 @@ def test_cch(self): mol = Molecule().from_adjacency_list(adjlist) paths = find_adsorbate_delocalization_paths(mol.atoms[0]) + no_surface_sites = sum(atom.is_surface_site() for atom in paths[0][:4]) assert paths + assert no_surface_sites == 2 + + def test_xcxch2(self): + adjlist = """ +1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} +2 C u0 p0 c0 {1,S} {6,T} +3 H u0 p0 c0 {1,S} +4 H u0 p0 c0 {1,S} +5 X u0 p0 c0 {1,S} +6 X u0 p0 c0 {2,T} + """ + + mol = Molecule().from_adjacency_list(adjlist) + paths = find_adsorbate_delocalization_paths(mol.atoms[0]) + assert len(paths) == 0 + + + class FindAdsorbateConjugateDelocalizationPathsTest: """ test the find_adsorbate_conjugate_delocalization_paths """ - def test_chchc(self): + def test_xchchxc(self): adjlist = """ 1 X u0 p0 c0 {3,T} 2 X u0 p0 c0 {5,S} @@ -569,4 +588,24 @@ def test_chchc(self): mol = Molecule().from_adjacency_list(adjlist) paths = find_adsorbate_conjugate_delocalization_paths(mol.atoms[0]) - assert paths \ No newline at end of file + no_surface_sites = sum(atom.is_surface_site() for atom in paths[0][:5]) + assert paths + assert no_surface_sites == 2 + + def test_xchch2xch2(self): + adjlist = """ +1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} +2 C u0 p0 c0 {1,S} {6,S} {7,S} {9,S} +3 C u0 p0 c0 {1,S} {8,S} {10,D} +4 H u0 p0 c0 {1,S} +5 H u0 p0 c0 {1,S} +6 H u0 p0 c0 {2,S} +7 H u0 p0 c0 {2,S} +8 H u0 p0 c0 {3,S} +9 X u0 p0 c0 {2,S} +10 X u0 p0 c0 {3,D} + """ + + mol = Molecule().from_adjacency_list(adjlist) + paths = find_adsorbate_conjugate_delocalization_paths(mol.atoms[0]) + assert len(paths) == 0