From e830a81eded2fd04f820759466d9daf5aa5ea320 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 3 Feb 2021 21:48:14 +0200 Subject: [PATCH 1/6] Improve Species multiplicity handling --- arc/species/species.py | 96 ++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 36 deletions(-) diff --git a/arc/species/species.py b/arc/species/species.py index ff66d8094b..f2f6b203ac 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -342,6 +342,7 @@ def __init__(self, if species_dict is None or self.yml_path is not None: # Not reading from a dictionary + print('OK 1') self.force_field = force_field self.is_ts = is_ts self.ts_conf_spawned = False @@ -1219,45 +1220,68 @@ def determine_multiplicity(self, adjlist: str, mol: Optional[Molecule]): """ - Determine the spin multiplicity of the species + Determine the spin multiplicity of the species. + + Args: + smiles (str): The SMILES descriptor . + adjlist (str): The adjacency list descriptor. + mol (Molecule): The respective RMG Molecule object. """ - get_mul_from_xyz = True if self.charge != 0 else False - if not get_mul_from_xyz: - if mol is not None and mol.multiplicity >= 1: - self.multiplicity = mol.multiplicity - elif adjlist: - mol = Molecule().from_adjacency_list(adjlist, raise_atomtype_exception=False, - raise_charge_exception=False) - self.multiplicity = mol.multiplicity - elif self.mol is not None and self.mol.multiplicity >= 1: - self.multiplicity = self.mol.multiplicity - elif smiles: - mol = Molecule(smiles=smiles) - self.multiplicity = mol.multiplicity - if self.multiplicity is None or self.multiplicity < 1: - get_mul_from_xyz = True - if get_mul_from_xyz: - xyz = self.get_xyz() - if xyz is None and len(self.conformers): - xyz = self.conformers[0] - if xyz: - electrons = 0 - for symbol in xyz['symbols']: - for number, symb in symbol_by_number.items(): - if symbol == symb: - electrons += number - break - else: - raise SpeciesError(f'Could not identify atom symbol {symbol}') - electrons -= self.charge - if electrons % 2 == 1: - self.multiplicity = 2 - logger.warning(f'\nMultiplicity not specified for {self.label}, assuming a value of 2') - else: - self.multiplicity = 1 - logger.warning(f'\nMultiplicity not specified for {self.label}, assuming a value of 1') + if self.charge == 0: + self.determine_multiplicity_from_descriptors(smiles=smiles, adjlist=adjlist, mol=mol) + if self.multiplicity is None or self.multiplicity < 1: + self.determine_multiplicity_from_xyz() if self.multiplicity is None: raise SpeciesError(f'Could not determine multiplicity for species {self.label}') + print(self, self.multiplicity) + + def determine_multiplicity_from_descriptors(self, + smiles: str, + adjlist: str, + mol: Optional[Molecule]): + """ + Determine the spin multiplicity of the species from the chemical descriptors. + + Args: + smiles (str): The SMILES descriptor . + adjlist (str): The adjacency list descriptor. + mol (Molecule): The respective RMG Molecule object. + """ + if mol is not None and mol.multiplicity >= 1: + self.multiplicity = mol.multiplicity + elif adjlist: + mol = Molecule().from_adjacency_list(adjlist, raise_atomtype_exception=False, + raise_charge_exception=False) + self.multiplicity = mol.multiplicity + elif self.mol is not None and self.mol.multiplicity >= 1: + self.multiplicity = self.mol.multiplicity + elif smiles: + mol = Molecule(smiles=smiles) + self.multiplicity = mol.multiplicity + + def determine_multiplicity_from_xyz(self): + """ + Determine the spin multiplicity of the species from the xyz. + """ + xyz = self.get_xyz() + if xyz is None and len(self.conformers): + xyz = self.conformers[0] + if xyz: + electrons = 0 + for symbol in xyz['symbols']: + for number, symb in symbol_by_number.items(): + if symbol == symb: + electrons += number + break + else: + raise SpeciesError(f'Could not identify atom symbol {symbol}') + electrons -= self.charge + if electrons % 2 == 1: + self.multiplicity = 2 + logger.warning(f'\nMultiplicity not specified for {self.label}, assuming a value of 2') + else: + self.multiplicity = 1 + logger.warning(f'\nMultiplicity not specified for {self.label}, assuming a value of 1') def make_ts_report(self): """A helper function to write content into the .ts_report attribute""" From 25cde09ede105bb3ab428e14815d2727478db4c5 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 3 Feb 2021 21:48:33 +0200 Subject: [PATCH 2/6] Tests: Species multiplicity --- arc/species/speciesTest.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arc/species/speciesTest.py b/arc/species/speciesTest.py index 25abf8d768..4c4a8a234e 100644 --- a/arc/species/speciesTest.py +++ b/arc/species/speciesTest.py @@ -1252,6 +1252,20 @@ def test_net_charged_species(self): cation_rad.generate_conformers() self.assertTrue(len(cation_rad.conformers)) + def test_determine_multiplicity(self): + """Test determining a species multiplicity""" + h_rad = ARCSpecies(label='H', smiles='[H]') + self.assertEqual(h_rad.multiplicity, 2) + + n2 = ARCSpecies(label='N#N', smiles='N#N') + self.assertEqual(n2.multiplicity, 1) + + methyl_peroxyl = ARCSpecies(label='CH3OO', smiles='CO[O]') + self.assertEqual(methyl_peroxyl.multiplicity, 2) + + ch_ts = ARCSpecies(label='C--H-TS', xyz='C 0 0 0\nH 1 2 5', is_ts=True) + self.assertEqual(ch_ts.multiplicity, 2) + def test_are_coords_compliant_with_graph(self): """Test coordinates compliant with 2D graph connectivity""" self.assertTrue(are_coords_compliant_with_graph(xyz=self.spc6.get_xyz(), mol=self.spc6.mol)) From 8de7736f8d22663528a210ba12b96398b40c3581 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 3 Feb 2021 21:49:08 +0200 Subject: [PATCH 3/6] Improve Reaction multiplicity handling --- arc/reaction.py | 169 +++++++++++++++++++----------------------------- 1 file changed, 68 insertions(+), 101 deletions(-) diff --git a/arc/reaction.py b/arc/reaction.py index 375272958b..5b33d999c7 100644 --- a/arc/reaction.py +++ b/arc/reaction.py @@ -121,8 +121,8 @@ def __init__(self, self.multiplicity = multiplicity self.charge = charge if self.multiplicity is not None and not isinstance(self.multiplicity, int): - raise InputError('Reaction multiplicity must be an integer, got {0} of type {1}.'.format( - self.multiplicity, type(self.multiplicity))) + raise InputError(f'Reaction multiplicity must be an integer, ' + f'got {self.multiplicity} which is a {type(self.multiplicity)}.') self.reactants = reactants self.products = products self.rmg_reaction = rmg_reaction @@ -310,115 +310,65 @@ def arc_species_from_rmg_reaction(self): def determine_rxn_multiplicity(self): """A helper function for determining the surface multiplicity""" + r_species, p_species = self.get_comprehensive_species() if self.multiplicity is None: ordered_r_mult_list, ordered_p_mult_list = list(), list() - if len(self.r_species): - if len(self.r_species) == 1: - self.multiplicity = self.r_species[0].multiplicity - elif len(self.r_species) == 2: - ordered_r_mult_list = sorted([self.r_species[0].multiplicity, - self.r_species[1].multiplicity]) - elif len(self.r_species) == 3: - ordered_r_mult_list = sorted([self.r_species[0].multiplicity, - self.r_species[1].multiplicity, - self.r_species[2].multiplicity]) - if len(self.p_species) == 1: - self.multiplicity = self.p_species[0].multiplicity - elif len(self.p_species) == 2: - ordered_p_mult_list = sorted([self.p_species[0].multiplicity, - self.p_species[1].multiplicity]) - elif len(self.p_species) == 3: - ordered_p_mult_list = sorted([self.p_species[0].multiplicity, - self.p_species[1].multiplicity, - self.p_species[2].multiplicity]) + if len(r_species): + if len(r_species) == 1: + self.multiplicity = r_species[0].multiplicity + else: + ordered_r_mult_list = sorted([r_spc.multiplicity for r_spc in r_species]) + if len(p_species) == 1: + self.multiplicity = p_species[0].multiplicity + else: + ordered_p_mult_list = sorted([p_spc.multiplicity for p_spc in p_species]) + elif self.rmg_reaction is not None: if len(self.rmg_reaction.reactants) == 1: self.multiplicity = self.rmg_reaction.reactants[0].molecule[0].multiplicity - elif len(self.rmg_reaction.reactants) == 2: - ordered_r_mult_list = sorted([self.rmg_reaction.reactants[0].molecule[0].multiplicity, - self.rmg_reaction.reactants[1].molecule[0].multiplicity]) - elif len(self.rmg_reaction.reactants) == 3: - ordered_r_mult_list = sorted([self.rmg_reaction.reactants[0].molecule[0].multiplicity, - self.rmg_reaction.reactants[1].molecule[0].multiplicity, - self.rmg_reaction.reactants[2].molecule[0].multiplicity]) + else: + ordered_r_mult_list = sorted([r_spc.molecule[0].multiplicity for r_spc in self.rmg_reaction.reactants]) if len(self.rmg_reaction.products) == 1: self.multiplicity = self.rmg_reaction.products[0].molecule[0].multiplicity - elif len(self.rmg_reaction.products) == 2: - ordered_p_mult_list = sorted([self.rmg_reaction.products[0].molecule[0].multiplicity, - self.rmg_reaction.products[1].molecule[0].multiplicity]) - elif len(self.rmg_reaction.products) == 3: - ordered_p_mult_list = sorted([self.rmg_reaction.products[0].molecule[0].multiplicity, - self.rmg_reaction.products[1].molecule[0].multiplicity, - self.rmg_reaction.products[2].molecule[0].multiplicity]) + else: + ordered_p_mult_list = sorted([p_spc.molecule[0].multiplicity for p_spc in self.rmg_reaction.products]) if self.multiplicity is None: - if ordered_r_mult_list == [1, 1]: - self.multiplicity = 1 # S + S = D - elif ordered_r_mult_list == [1, 2]: - self.multiplicity = 2 # S + D = D - elif ordered_r_mult_list == [2, 2]: - # D + D = S or T - if ordered_p_mult_list in [[1, 1], [1, 1, 1]]: - self.multiplicity = 1 - elif ordered_p_mult_list in [[1, 3], [1, 1, 3]]: - self.multiplicity = 3 - else: - self.multiplicity = 1 - logger.warning(f'ASSUMING a multiplicity of 1 (singlet) for reaction {self.label}') - elif ordered_r_mult_list == [1, 3]: - self.multiplicity = 3 # S + T = T - elif ordered_r_mult_list == [2, 3]: - # D + T = D or Q - if ordered_p_mult_list in [[1, 2], [1, 1, 2]]: - self.multiplicity = 2 - elif ordered_p_mult_list in [[1, 4], [1, 1, 4]]: - self.multiplicity = 4 - else: - self.multiplicity = 2 - logger.warning(f'ASSUMING a multiplicity of 2 (doublet) for reaction {self.label}') - elif ordered_r_mult_list == [3, 3]: - # T + T = S or T or quintet - if ordered_p_mult_list in [[1, 1], [1, 1, 1]]: - self.multiplicity = 1 - elif ordered_p_mult_list in [[1, 3], [1, 1, 3]]: - self.multiplicity = 3 - elif ordered_p_mult_list in [[1, 5], [1, 1, 5]]: - self.multiplicity = 5 - else: - self.multiplicity = 3 - logger.warning(f'ASSUMING a multiplicity of 3 (triplet) for reaction {self.label}') - elif ordered_r_mult_list == [1, 1, 1]: - self.multiplicity = 1 # S + S + S = S - elif ordered_r_mult_list == [1, 1, 2]: - self.multiplicity = 2 # S + S + D = D - elif ordered_r_mult_list == [1, 1, 3]: - self.multiplicity = 3 # S + S + T = T - elif ordered_r_mult_list == [1, 2, 2]: - # S + D + D = S or T - if ordered_p_mult_list in [[1, 1], [1, 1, 1]]: - self.multiplicity = 1 - elif ordered_p_mult_list in [[1, 3], [1, 1, 3]]: - self.multiplicity = 3 - else: - self.multiplicity = 1 - logger.warning(f'ASSUMING a multiplicity of 1 (singlet) for reaction {self.label}') - elif ordered_r_mult_list == [2, 2, 2]: - # D + D + D = D or Q - if ordered_p_mult_list in [[1, 2], [1, 1, 2]]: - self.multiplicity = 2 - elif ordered_p_mult_list in [[1, 4], [1, 1, 4]]: - self.multiplicity = 4 - else: + for list_1, list_2 in [(ordered_r_mult_list, ordered_p_mult_list), + (ordered_p_mult_list, ordered_r_mult_list)]: + if all(m == 1 for m in list_1): + self.multiplicity = 1 # S + S = S + break + if 2 in list_1 and all(m == 1 for i, m in enumerate(list_1) if i != list_1.index(2)): + self.multiplicity = 2 # S + D = D + break + if 3 in list_1 and all(m == 1 for i, m in enumerate(list_1) if i != list_1.index(3)): + self.multiplicity = 3 # S + T = T + break + if 4 in list_1 and all(m == 1 for i, m in enumerate(list_1) if i != list_1.index(4)): + self.multiplicity = 4 # S + Q = Q + break + if all(m == 2 for m in list_1): + # D + D = S or T + # D + D + D = D or Q + if len(list_1) % 2 == 0: # even number of D's in list_1, m must be an odd number + if any(m > 2 for m in list_2): + self.multiplicity = max(list_2) if max(list_2) % 2 == 1 else max(list_2) - 1 + else: # odd number of D's in list_1, m must be even + self.multiplicity = max(list_2) if max(list_2) % 2 == 0 else max(list_2) - 1 + if all(m == 3 for m in list_1): + # T + T = S or P + # T + T + T = T or 7 + if len(list_1) % 2 == 0: # even number of T's in list_1, m must be 1 or 5 + self.multiplicity = 1 + logger.warning(f'ASSUMING a multiplicity of 1 (singlet) for reaction {self.label}') + else: # odd number of D's in list_1, m must be 3 or 7 + self.multiplicity = 3 + logger.warning(f'ASSUMING a multiplicity of 3 (triplet) for reaction {self.label}') + if list_1 == [2, 3] and 4 not in list_2: + # D + T = D or Q self.multiplicity = 2 logger.warning(f'ASSUMING a multiplicity of 2 (doublet) for reaction {self.label}') - elif ordered_r_mult_list == [1, 2, 3]: - # S + D + T = D or Q - if ordered_p_mult_list in [[1, 2], [1, 1, 2]]: - self.multiplicity = 2 - elif ordered_p_mult_list in [[1, 4], [1, 1, 4]]: - self.multiplicity = 4 - self.multiplicity = 2 - logger.warning(f'ASSUMING a multiplicity of 2 (doublet) for reaction {self.label}') - else: + if self.multiplicity is None: raise ReactionError(f'Could not determine multiplicity for reaction {self.label}') logger.info(f'Setting multiplicity of reaction {self.label} to {self.multiplicity}') @@ -632,6 +582,23 @@ def get_species_count(self, well_str.endswith(f' {species.label}') return count + def get_comprehensive_species(self): + """ + Get a list of all reactants and a list of all products, including duplicate species. + For example, for self recombination of a radical (R1 + R1), self.r_species will be just [R1], + while this method will return [R1, R1]. + + Returns: + Tuple[List[ARCSpecies], List[ARCSpecies]]: + The comprehensive reactants and products species lists, respectively. + """ + r_species, p_species = list(), list() + for spc in self.r_species: + r_species.extend([spc] * self.get_species_count(species=spc, well=0)) + for spc in self.p_species: + p_species.extend([spc] * self.get_species_count(species=spc, well=1)) + return r_species, p_species + def get_atom_map(self, verbose: int = 0) -> Optional[List[int]]: """ Get the atom mapping of the reactant atoms to the product atoms. From 4d6b0c96d665b9be3080bb8978df1805b629cb7d Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 3 Feb 2021 21:49:18 +0200 Subject: [PATCH 4/6] Tests: Reaction multiplicity --- arc/reactionTest.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arc/reactionTest.py b/arc/reactionTest.py index 3f7f136aa8..f0af43e257 100644 --- a/arc/reactionTest.py +++ b/arc/reactionTest.py @@ -46,6 +46,10 @@ def setUpClass(cls): cls.rxn4 = ARCReaction(reactants=['[NH2]', 'N[NH]'], products=['N', 'N[N]']) cls.rxn4.rmg_reaction = Reaction(reactants=[Species().from_smiles('[NH2]'), Species().from_smiles('N[NH]')], products=[Species().from_smiles('N'), Species().from_smiles('N[N]')]) + cls.rxn5 = ARCReaction(reactants=['CO[O]', 'CO[O]'], products=['C=O', 'CO', '[O][O]']) + cls.rxn5.rmg_reaction = Reaction(reactants=[Species().from_smiles('CO[O]'), Species().from_smiles('CO[O]')], + products=[Species().from_smiles('C=O'), Species().from_smiles('CO'), + Species().from_smiles('[O][O]')]) def test_str(self): """Test the string representation of the object""" @@ -125,6 +129,9 @@ def test_determine_multiplicity(self): self.rxn4.determine_rxn_multiplicity() self.assertEqual(self.rxn4.multiplicity, 3) + self.rxn5.determine_rxn_multiplicity() + self.assertEqual(self.rxn5.multiplicity, 3) + def test_check_atom_balance(self): """Test the Reaction check_atom_balance method""" @@ -165,6 +172,17 @@ def test_get_species_count(self): self.assertEqual(rxn1.get_species_count(species=spc2, well=0), 1) self.assertEqual(rxn1.get_species_count(species=spc2, well=1), 2) + def test_get_comprehensive_species(self): + """Test identifying duplicate species in reactants/products""" + rxn1 = ARCReaction(label='methylperoxyl + methylperoxyl <=> methanol + formaldehyde + O2') + rxn1.r_species = [ARCSpecies(label='methylperoxyl', smiles='CO[O]')] + rxn1.p_species = [ARCSpecies(label='methanol', smiles='CO'), + ARCSpecies(label='formaldehyde', smiles='C=O'), + ARCSpecies(label='O2', smiles='[O][O]')] + r_species, p_species = rxn1.get_comprehensive_species() + self.assertEqual(len(r_species), 2) + self.assertEqual(len(p_species), 3) + def test_get_atom_map(self): """Test getting an atom map for a reaction""" @@ -988,9 +1006,6 @@ def test_get_mapped_product_xyz(self): self.assertTrue(mapped_product.get_xyz(), h2o_xyz_1) - - - def check_atom_map(rxn: ARCReaction) -> bool: """ A helper function for testing a reaction atom map. From a7cbaa8049fa67482aa234a0d22c1689ebea13e5 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 3 Feb 2021 22:05:15 +0200 Subject: [PATCH 5/6] Minor: Typo in common docstring --- arc/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arc/common.py b/arc/common.py index 7dde390b4d..2bbc8278c6 100644 --- a/arc/common.py +++ b/arc/common.py @@ -958,7 +958,7 @@ def check_torsion_change(torsions: pd.DataFrame, differences are equal to the scan resolution. Returns: pd.DataFrame - a DataFrame consisting of ``True``/``False``, indicating + A DataFrame consisting of ``True``/``False``, indicating which torsions changed significantly. ``True`` for significant change. """ # First iteration without 180/-180 adjustment From cba1a6b8dbc4682f157ddb9390f12a24b3b5b0d4 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Thu, 4 Feb 2021 18:22:02 +0200 Subject: [PATCH 6/6] Minor: Added type hints to conformers --- arc/species/conformers.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arc/species/conformers.py b/arc/species/conformers.py index 720816c342..8b5a5cb6bd 100644 --- a/arc/species/conformers.py +++ b/arc/species/conformers.py @@ -808,7 +808,11 @@ def determine_dihedrals(conformers, torsions): return conformers -def determine_torsion_sampling_points(label, torsion_angles, smeared_scan_res=None, symmetry=1): +def determine_torsion_sampling_points(label: str, + torsion_angles: list, + smeared_scan_res: Optional[float] = None, + symmetry: int = 1, + ) -> Tuple[list. list]: """ Determine how many points to consider in each well of a torsion for conformer combinations. @@ -819,10 +823,10 @@ def determine_torsion_sampling_points(label, torsion_angles, smeared_scan_res=No symmetry (int, optional): The torsion symmetry number. Returns: - list: Sampling points for the torsion. - Returns: - list: Each entry is a well dictionary with the keys - ``start_idx``, ``end_idx``, ``start_angle``, ``end_angle``, ``angles``. + Tuple[list. list]: + - Sampling points for the torsion. + - list: Each entry is a well dictionary with the keys + ``start_idx``, ``end_idx``, ``start_angle``, ``end_angle``, ``angles``. """ smeared_scan_res = smeared_scan_res or SMEARED_SCAN_RESOLUTIONS sampling_points = list() @@ -1391,7 +1395,10 @@ def rdkit_force_field(label, rd_mol, force_field='MMFF94s', optimize=True): return xyzs, energies -def get_wells(label, angles, blank=20): +def get_wells(label: str, + angles: list, + blank: int = 20, + ) -> list: """ Determine the distinct wells from a list of angles.