From 2a0e19faa1ca69ae90bff6348e5a584560d4199b Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 17:13:50 -0400 Subject: [PATCH] Add docs to workflow of conformer_generation Add rst files and clean up docstrings --- .../reference/conformer_generation/index.rst | 1 + .../workflow/generators.rst | 7 + .../conformer_generation/workflow/index.rst | 10 + .../conformer_generation/workflow/metrics.rst | 7 + .../conformer_generation/workflow/sampler.rst | 7 + .../workflow/ts_generators.rst | 7 + rdmc/conformer_generation/generators.py | 202 ++++++++++++---- rdmc/conformer_generation/metrics.py | 64 +++-- rdmc/conformer_generation/sampler.py | 219 +++++++++++------- rdmc/conformer_generation/ts_generators.py | 124 +++++++--- 10 files changed, 472 insertions(+), 176 deletions(-) create mode 100644 docs/source/reference/conformer_generation/workflow/generators.rst create mode 100644 docs/source/reference/conformer_generation/workflow/index.rst create mode 100644 docs/source/reference/conformer_generation/workflow/metrics.rst create mode 100644 docs/source/reference/conformer_generation/workflow/sampler.rst create mode 100644 docs/source/reference/conformer_generation/workflow/ts_generators.rst diff --git a/docs/source/reference/conformer_generation/index.rst b/docs/source/reference/conformer_generation/index.rst index 4faf5535..60321da6 100644 --- a/docs/source/reference/conformer_generation/index.rst +++ b/docs/source/reference/conformer_generation/index.rst @@ -12,4 +12,5 @@ rdmc.conformer_generation embedding_geometries/index geometry_optimization/index postprocessing/index + workflow/index utils diff --git a/docs/source/reference/conformer_generation/workflow/generators.rst b/docs/source/reference/conformer_generation/workflow/generators.rst new file mode 100644 index 00000000..bbe1c678 --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/generators.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.generators +===================================== + +.. automodule:: rdmc.conformer_generation.generators + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/workflow/index.rst b/docs/source/reference/conformer_generation/workflow/index.rst new file mode 100644 index 00000000..fc3a1b24 --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/index.rst @@ -0,0 +1,10 @@ +Workflow +===================== + +.. toctree:: + :maxdepth: 2 + + generators + ts_generators + metrics + sampler diff --git a/docs/source/reference/conformer_generation/workflow/metrics.rst b/docs/source/reference/conformer_generation/workflow/metrics.rst new file mode 100644 index 00000000..5ae1cb07 --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/metrics.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.metrics +===================================== + +.. automodule:: rdmc.conformer_generation.metrics + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/workflow/sampler.rst b/docs/source/reference/conformer_generation/workflow/sampler.rst new file mode 100644 index 00000000..2d783700 --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/sampler.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.sampler +======================================= + +.. automodule:: rdmc.conformer_generation.sampler + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/workflow/ts_generators.rst b/docs/source/reference/conformer_generation/workflow/ts_generators.rst new file mode 100644 index 00000000..076f0fad --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/ts_generators.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.ts_generators +======================================= + +.. automodule:: rdmc.conformer_generation.ts_generators + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/rdmc/conformer_generation/generators.py b/rdmc/conformer_generation/generators.py index 1c6bce59..65bd17e0 100644 --- a/rdmc/conformer_generation/generators.py +++ b/rdmc/conformer_generation/generators.py @@ -4,15 +4,17 @@ """ Modules for conformer generation workflows """ +import logging +from time import time +from typing import Optional + +import numpy as np from rdmc.mol import RDKitMol from .embedders import * from .pruners import * from .optimizers import * from .metrics import * -import numpy as np -import logging -from time import time logging.basicConfig( @@ -26,23 +28,61 @@ class StochasticConformerGenerator: """ A module for stochastic conformer generation. The workflow follows an embed -> optimize -> prune cycle with custom stopping criteria. Additional final modules can be added at the user's discretion. + + Args: + smiles (str): SMILES input for which to generate conformers. + embedder (ConfGenEmbedder, optional): Instance of a :obj:`ConfGenEmbedder `. + Available options are :obj:`ETKDGEmbedder `, + :obj:`GeoMolEmbedder `, and + :obj:`RandomEmbedder `. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. + estimator (Estimator, optional): Instance of an :obj:`Estimator `. Available option is + :obj:`ConfSolv `. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + metric (SCGMetric, optional): The available option is `SCGMetric `. + min_iters (int, optional): Minimum number of iterations for which to run the module. + max_iters (int, optional}: Maximum number of iterations for which to run the module. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. """ - def __init__(self, smiles, embedder=None, optimizer=None, estimator=None, pruner=None, - metric=None, min_iters=None, max_iters=None, final_modules=None, - config=None, track_stats=False): + def __init__(self, + smiles, + embedder: Optional['ConfGenEmbedder'] = None, + optimizer: Optional['ConfGenOptimizer'] = None, + estimator: Optional['Estimator'] = None, + pruner: Optional['ConfGenPruner'] = None, + metric: Optional['SCGMetric'] = None, + min_iters: Optional[int] = None, + max_iters: Optional[int] = None, + final_modules: Optional[list] = None, + config: Optional[dict] = None, + track_stats: bool = False): """ - Generate an RDKitMol Molecule instance from a RDKit ``Chem.rdchem.Mol`` or ``RWMol`` molecule. + Initialize the StochasticConformerGenerator module. Args: smiles (str): SMILES input for which to generate conformers. - embedder (class): Instance of an embedder from embedders.py. - optimizer (class): Instance of a optimizer from optimizers.py. - estimator (class): Any energy estimator instance. - pruner (class): Instance of a pruner from pruners.py. - metric (class): Instance of a metric from metrics.py. - min_iters (int): Minimum number of iterations for which to run the module (default=5). - max_iters (int}: Maximum number of iterations for which to run the module (default=100). - final_modules (List): List of instances of optimizer/pruner to run after initial cycles complete. + embedder (ConfGenEmbedder, optional): Instance of a :obj:`ConfGenEmbedder `. + Available options are :obj:`ETKDGEmbedder `, + :obj:`GeoMolEmbedder `, and + :obj:`RandomEmbedder `. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. + estimator (Estimator, optional): Instance of an :obj:`Estimator `. Available option is + :obj:`ConfSolv `. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + metric (SCGMetric, optional): The available option is `SCGMetric `. + min_iters (int, optional): Minimum number of iterations for which to run the module. + max_iters (int, optional}: Maximum number of iterations for which to run the module. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. """ self.logger = logging.getLogger(f"{self.__class__.__name__}") @@ -74,8 +114,19 @@ def __init__(self, smiles, embedder=None, optimizer=None, estimator=None, pruner if isinstance(self.pruner, TorsionPruner): self.pruner.initialize_torsions_list(smiles) - def __call__(self, n_conformers_per_iter, **kwargs): + def __call__(self, + n_conformers_per_iter: int, + **kwargs, + ) -> List[dict]: + """ + Run the workflow for stochastic conformer generation. + + Args: + n_conformers_per_iter (int): The number of conformers to generate per iteration. + Returns: + unique_mol_data (List[dict]): A list of dictionaries containing the unique conformers. + """ self.logger.info(f"Generating conformers for {self.smiles}") time_start = time() for _ in range(self.max_iters): @@ -131,9 +182,46 @@ def __call__(self, n_conformers_per_iter, **kwargs): return unique_mol_data - def set_config(self, config, embedder=None, optimizer=None, pruner=None, metric=None, final_modules=None, - min_iters=None, max_iters=None): + def set_config(self, + config: str, + embedder: Optional['ConfGenEmbedder'] = None, + optimizer: Optional['ConfGenOptimizer'] = None, + pruner: Optional['ConfGenPruner'] = None, + metric: Optional['SCGMetric'] = None, + min_iters: Optional[int] = None, + max_iters: Optional[int] = None, + final_modules: Optional[list] = None, + ): + """ + Set the configuration for the conformer generator with pre-defined options: ``"loose"`` and ``"normal"``. + Args: + embedder (ConfGenEmbedder, optional): Instance of a :obj:`ConfGenEmbedder `. + Available options are :obj:`ETKDGEmbedder `, + :obj:`GeoMolEmbedder `, and + :obj:`RandomEmbedder `. + Defaults to :obj:`ETKDGEmbedder `. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. Defaults to + :obj:`XTBOptimizer ` with ``"gff"`` method. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. By default, + ``"loose"`` utilizes :obj:`TorsionPruner ` with + ``mean_chk_threshold=20`` and ``max_chk_threshold=30``, and ``"normal"`` utilizes + :obj:`CRESTPruner `. + metric (SCGMetric, optional): The available option is `SCGMetric `. + By default, both modes use ``"entropy"`` metric, while ``"loose"`` with ``window=3`` and ``threshold=0.05``, + and ``"normal"`` with ``window=5`` and ``threshold=0.01``. + min_iters (int, optional): Minimum number of iterations for which to run the module. Defaults to ``3`` for ``"loose"`` and ``5`` for ``"normal"``. + max_iters (int, optional}: Maximum number of iterations for which to run the module. Defaults to ``20`` for ``"loose"`` and ``100`` for ``"normal"``. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. By default, no final modules are used for ``"loose"``. + For ``"normal"``, two :obj:`CRESTPruner ` with ``ewin=12`` and ``ewin=6``, are introduced + before and after a :obj:`XTBOptimizer ` with ``"gfn2"`` method + and ``"vtight"`` level. + """ if config == "loose": self.embedder = ETKDGEmbedder() if not embedder else embedder self.optimizer = XTBOptimizer(method="gff") if not optimizer else optimizer @@ -156,37 +244,63 @@ def set_config(self, config, embedder=None, optimizer=None, pruner=None, metric= self.min_iters = 5 if not min_iters else min_iters self.max_iters = 100 if not max_iters else max_iters -class ConformerGenerator(): +class ConformerGenerator: + """ + A module for conformer generation. The workflow follows an embed -> optimize -> prune cycle with + custom stopping criteria. Additional final modules can be added at the user's discretion. + + Args: + smiles (str): SMILES input for which to generate conformers. + multiplicity (int, optional): The spin multiplicity of the species. Defaults to ``None``, + which will be interpreted from molecule generated by the `smiles`. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (Verifier, optional): Instance of a :obj:`Verifier `. + Available option is :obj:`XTBFrequencyVerifier `. + sampler (TorsionalSampler, optional): Instance of a :obj:`TorsionalSampler `. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. + save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. + """ def __init__(self, smiles: str, multiplicity: Optional[int] = None, - optimizer: Optional['Optimizer'] = None, + optimizer: Optional['ConfGenOptimizer'] = None, pruner: Optional['ConfGenPruner'] = None, verifiers: Optional[Union['Verifier',List['Verifier']]] = None, sampler: Optional['TorisonalSampler'] = None, - final_modules: Optional[Union['Optimizer','Verifier']] = None, + final_modules: Optional[Union['ConfGenOptimizer','Verifier']] = None, save_dir: Optional[str] = None, ) -> 'ConformerGenerator': """ - Initiate the conformer generator object. The best practice is set all information here + Initiate the conformer generator object. The best practice is set all information here. + Args: - smiles (str): The SMILES of the species. - multiplicity (int, optional): The spin multiplicity of the species. The spin multiplicity will be interpreted from the smiles if this - is not given by the user. - optimizer (GaussianOptimizer, optional): The optimizer used to optimize geometries. - pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. Available options are - `CRESTPruner` and `TorsionPruner`. - verifiers (XTBFrequencyVerifier, optional): The verifier used to verify the obtained conformer. - sampler (TorisonalSampler, optional): The sampler used to do automated conformer search for the obtained conformer. - final_modules (Optimizer, Verifier, optional): The final modules can include optimizer in different LoT than previous - one and verifier used to verify the obtained conformer. + smiles (str): SMILES input for which to generate conformers. + multiplicity (int, optional): The spin multiplicity of the species. Defaults to ``None``, + which will be interpreted from molecule generated by the `smiles`. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (Verifier, optional): Instance of a :obj:`Verifier `. + Available option is :obj:`XTBFrequencyVerifier `. + sampler (TorsionalSampler, optional): Instance of a :obj:`TorsionalSampler `. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. """ self.logger = logging.getLogger(f"{self.__class__.__name__}") self.smiles = smiles if multiplicity: self.multiplicity = multiplicity - else: + else: mol = RDKitMol.FromSmiles(smiles) mul = mol.GetSpinMultiplicity() self.multiplicity = mul @@ -203,12 +317,14 @@ def __init__(self, def embed_stable_species(self, smiles: str, n_conformers: int = 20, - ) -> 'rdmc.RDKitMol': + ) -> 'RDKitMol': """ Embed the well conformer according to the SMILES provided. + Args: smiles (str): The well conformer SMILES. - n_conformers (int, optional): The maximum number of conformers to be generated. Defaults to 20. + n_conformers (int, optional): The maximum number of conformers to be generated. Defaults to ``20``. + Returns: An RDKitMol of the well conformer with 3D geometry embedded. """ @@ -243,18 +359,18 @@ def embed_stable_species(self, return mol def set_filter(self, - mol: 'RDKitMol', + mol: RDKitMol, n_conformers: int, ) -> list: """ - Assign the indices of reactions to track wheter the conformers are passed to the following steps. + Assign the indices of conformers to track whether the conformers are passed to the following steps. Args: mol ('RDKitMol'): The stable species in RDKitMol object with 3D geometries embedded. n_conformers (int): The maximum number of conformers to be passed to the following steps. Returns: - An RDKitMol with KeepIDs having `True` values to be passed to the following steps. + RDKitMol: with ``KeepIDs`` as a list of ``True`` and ``False`` indicating whether a conformer passes the check. """ energy_dict = mol.energy KeepIDs = mol.KeepIDs @@ -275,10 +391,10 @@ def __call__(self, Run the workflow of well conformer generation. Args: - n_conformers (int): The maximum number of conformers to be generated. Defaults to 20. - n_verifies (int): The maximum number of conformers to be passed to the verifiers. Defaults to 20. - n_sampling (int): The maximum number of conformers to be passed to the torsional sampling. Defaults to 1. - n_refines (int): The maximum number of conformers to be passed to the final modeuls. Defaults to 1. + n_conformers (int): The maximum number of conformers to be generated. Defaults to ``20``. + n_verifies (int): The maximum number of conformers to be passed to the verifiers. Defaults to ``20``. + n_sampling (int): The maximum number of conformers to be passed to the torsional sampling. Defaults to ``1``. + n_refines (int): The maximum number of conformers to be passed to the final modules. Defaults to ``1``. """ if self.save_dir: @@ -342,4 +458,4 @@ def __call__(self, with open(os.path.join(self.save_dir, "workflow_check_ids.pkl"), "wb") as f: pickle.dump(opt_mol.KeepIDs, f) - return opt_mol \ No newline at end of file + return opt_mol diff --git a/rdmc/conformer_generation/metrics.py b/rdmc/conformer_generation/metrics.py index 1e66d526..da51070f 100644 --- a/rdmc/conformer_generation/metrics.py +++ b/rdmc/conformer_generation/metrics.py @@ -6,7 +6,7 @@ """ import numpy as np -from typing import Optional +from typing import List, Optional R = 0.0019872 # kcal/(K*mol) @@ -14,23 +14,34 @@ class SCGMetric: """ A class to calculate and track the given metric ("entropy", "partition function", or "total conformers") for a molecule over time. + + Args: + metric (str, optional): Metric to be calculated. Options are ``"entropy"``, ``"partition function"``, or ``"total conformers"``. + Defaults to ``"entropy"``. + window (int, optional): Window size to compute the change in metric (doesn't work when the metric is "total conformers"). + Defaults to ``5``. + threshold (float, optional): Threshold for the change in metric to decide when to stop generating conformers. + Defaults to ``0.01``. + T (float, optional): Temperature for entropy or partition function calculations. Defaults to ``298`` K. """ - def __init__( - self, - metric: Optional[str] = "entropy", - window: Optional[int] = 5, - threshold: Optional[float] = 0.01, - T: Optional[float] = 298, - ): + def __init__(self, + metric: Optional[str] = "entropy", + window: Optional[int] = 5, + threshold: Optional[float] = 0.01, + T: Optional[float] = 298, + ): """ Generate an SCGMetric instance. Args: - metric (str): Metric to be calculated. - window (int): Window size to compute the change in metric (doesn't work when the metric is "total conformers"). - threshold (float): Threshold for the change in metric to decide when to stop generating conformers. - T (float): Temperature for entropy or partition function calculations. + metric (str, optional): Metric to be calculated. Options are ``"entropy"``, ``"partition function"``, or ``"total conformers"``. + Defaults to ``"entropy"``. + window (int, optional): Window size to compute the change in metric (doesn't work when the metric is "total conformers"). + Defaults to ``5``. + threshold (float, optional): Threshold for the change in metric to decide when to stop generating conformers. + Defaults to ``0.01``. + T (float, optional): Temperature for entropy or partition function calculations. Defaults to ``298`` K. """ self.metric = metric self.window = window @@ -38,8 +49,14 @@ def __init__( self.T = T self.metric_history = [] - def calculate_metric(self, mol_data): + def calculate_metric(self, + mol_data: List[dict]): + """ + Calculate the metric for a given molecule. The calculated value will be appended to the ``metric_history`` list. + Args: + mol_data (List[dict]): A list of dictionaries with molecule conformers. + """ if self.metric == "entropy": metric_val = self.calculate_entropy(mol_data) @@ -55,7 +72,12 @@ def calculate_metric(self, mol_data): self.metric_history.append(metric_val) def check_metric(self): + """ + Check if the change in metric is below the threshold. + Returns: + bool: ``True`` if the change in metric is below the threshold, ``False`` otherwise. + """ if self.metric == "total conformers": return False else: @@ -66,8 +88,14 @@ def check_metric(self): ) return True if change <= self.threshold else False - def calculate_entropy(self, mol_data): + def calculate_entropy(self, + mol_data: List[dict]): + """ + Calculate the entropy of a molecule. + Args: + mol_data (List[dict]): A list of dictionaries with molecule conformers. + """ energies = np.array([c["energy"] for c in mol_data]) energies = energies - energies.min() _prob = np.exp(-energies / (R * self.T)) @@ -75,8 +103,14 @@ def calculate_entropy(self, mol_data): entropy = -R * np.sum(prob * np.log(prob)) return entropy - def calculate_partition_function(self, mol_data): + def calculate_partition_function(self, + mol_data: List[dict]): + """ + Calculate the partition function of a molecule. + Args: + mol_data (List[dict]): A list of dictionaries with molecule conformers. + """ energies = np.array([c["energy"] for c in mol_data]) energies = energies - energies.min() prob = np.exp(-energies / (R * self.T)) diff --git a/rdmc/conformer_generation/sampler.py b/rdmc/conformer_generation/sampler.py index 68b013ef..2aba1cc6 100644 --- a/rdmc/conformer_generation/sampler.py +++ b/rdmc/conformer_generation/sampler.py @@ -16,7 +16,6 @@ import numpy as np from scipy import constants from rdkit import Chem -import seaborn as sns import matplotlib.pyplot as plt from matplotlib.patches import Rectangle @@ -25,49 +24,83 @@ from rdmc.mathlib.greedymin import search_minimum from rdmc.ts import get_formed_and_broken_bonds -from xtb.libxtb import VERBOSITY_FULL, VERBOSITY_MINIMAL, VERBOSITY_MUTED -from xtb.utils import get_method, _methods -from xtb.interface import Calculator +try: + from xtb.libxtb import VERBOSITY_FULL, VERBOSITY_MINIMAL, VERBOSITY_MUTED + from xtb.utils import get_method, _methods + from xtb.interface import Calculator +except ImportError: + print("No xtb-python installation detected. Skipping import...") try: import scine_sparrow import scine_utilities as su except: - print("No scine_sparrow installation deteced. Skipping import...") + print("No scine_sparrow installation detected. Skipping import...") class TorsionalSampler: """ A class to find possible conformers by sampling the PES for each torsional pair. - You have to have the Spharrow and xtb-python packages installed to run this workflow. + You have to have the `Sparrow `_ and `xtb-python `_ packages installed to run this workflow. + + Args: + method (str, optional): The method to be used for automated conformer search. Only the methods available in Spharrow and xtb-python can be used. + Defaults to ``"GFN2-xTB"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + n_point_each_torsion (float, optional): Number of points to be sampled along each rotational mode. Defaults to ``45.``. + n_dimension (int, optional): Number of dimensions. Defaults to ``2``. If ``-1`` is assigned, the number of dimension would be the number of rotatable bonds. + optimizer (ConfGenOptimizer or TSOptimizer, optional): The optimizer used to optimize TS or stable specials geometries. Available options for + `TSOptimizer ` + are :obj:`SellaOptimizer `, + :obj:`OrcaOptimizer `, + and :obj:`GaussianOptimizer `. + pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (TSVerifier, Verifier, list of TSVerifiers or list of Verifiers, optional): The verifier or a list of verifiers used to verify the obtained conformer. Available + options are + :obj:`GaussianIRCVerifier `, + :obj:`OrcaIRCVerifier `, and + :obj:`XTBFrequencyVerifier `. """ - def __init__( - self, - method: str = "GFN2-xTB", - nprocs: int = 1, - memory: int = 1, - n_point_each_torsion: int = 45, - n_dimension: int = 2, - optimizer: Optional[Union["XTBOptimizer", "TSOptimizer", "Optimizer"]] = None, - pruner: Optional["ConfGenPruner"] = None, - verifiers: Optional[Union["TSVerifier", "Verifier", List["TSVerifier"], List["Verifier"]]] = None, - ): + def __init__(self, + method: str = "GFN2-xTB", + nprocs: int = 1, + memory: int = 1, + n_point_each_torsion: int = 45, + n_dimension: int = 2, + optimizer: Optional[Union["ConfGenOptimizer","TSOptimizer"]] = None, + pruner: Optional["ConfGenPruner"] = None, + verifiers: Optional[Union["TSVerifier", + "Verifier", + List["TSVerifier"], + List["Verifier"]]] = None, + ): """ Initiate the TorsionalSampler class object. + Args: method (str, optional): The method to be used for automated conformer search. Only the methods available in Spharrow and xtb-python can be used. - Defaults to GFN2-xTB. - nprocs (int, optional): The number of processors to use. Defaults to 1. - memory (int, optional): Memory in GB used by Gaussian. Defaults to 1. - n_point_each_torsion (int): Number of points to be sampled along each rotational mode. Defaults to 45. - n_dimension (int): Number of dimensions. Defaults to 2. If `-1` is assigned, the n_dimension would be the number of rotatable bonds. - optimizer (XTBOptimizer, TSOptimizer or Optimizer, optional): The optimizer used to optimize TS or stable specials geometries. Available options for `TSOptimizer` - are `SellaOptimizer`, `OrcaOptimizer`, and `GaussianOptimizer`. + Defaults to ``"GFN2-xTB"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + n_point_each_torsion (float, optional): Number of points to be sampled along each rotational mode. Defaults to ``45.``. + n_dimension (int, optional): Number of dimensions. Defaults to ``2``. If ``-1`` is assigned, the number of dimension would be the number of rotatable bonds. + optimizer (ConfGenOptimizer or TSOptimizer, optional): The optimizer used to optimize TS or stable specials geometries. Available options for + `TSOptimizer ` + are :obj:`SellaOptimizer `, + :obj:`OrcaOptimizer `, + and :obj:`GaussianOptimizer `. pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. Available options are - `CRESTPruner` and `TorsionPruner`. + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. verifiers (TSVerifier, Verifier, list of TSVerifiers or list of Verifiers, optional): The verifier or a list of verifiers used to verify the obtained conformer. Available - options are `GaussianIRCVerifier`, `OrcaIRCVerifier`, and `XTBFrequencyVerifier`. + options are + :obj:`GaussianIRCVerifier `, + :obj:`OrcaIRCVerifier `, and + :obj:`XTBFrequencyVerifier `. """ self.logger = logging.getLogger(f"{self.__class__.__name__}") self.method = method @@ -79,28 +112,28 @@ def __init__( self.pruner = pruner self.verifiers = [] if not verifiers else verifiers - def get_conformers_by_change_torsions( - self, - mol: RDKitMol, - id: int = 0, - torsions: List = None, - exclude_methyl: bool = True, - on_the_fly_check: bool = True, - ) -> List[RDKitMol]: + def get_conformers_by_change_torsions(self, + mol: RDKitMol, + id: int = 0, + torsions: Optional[list] = None, + exclude_methyl: bool = True, + on_the_fly_check: bool = True, + ) -> List[RDKitMol]: """ Generate conformers by rotating the angles of the torsions. A on-the-fly check can be applied, which identifies the conformers with colliding atoms. Args: mol (RDKitMol): A RDKitMol molecule object. - id (int): The ID of the conformer to be obtained. Defaults to 0. - torsions (list): A list of four-atom-index lists indicating the torsional modes. - exclude_methyl (bool): Whether exclude the torsions with methyl groups. Defaults to False. - If `torsions` is provided, this function won't work. - on_the_fly_filter (bool): Whether to check colliding atoms on the fly. Defaults to True. + id (int): The ID of the conformer to be obtained. Defaults to ``0``. + torsions (list): A list of four-atom-index lists indicating the torsional modes. Defaults to ``None``, + which means all the rotatable bonds will be used. + exclude_methyl (bool): Whether exclude the torsions with methyl groups. Defaults to ``False``. + Only valid if ``torsions`` is not provided. + on_the_fly_filter (bool): Whether to check colliding atoms on the fly. Defaults to ``True``. Returns: - A list of RDKitMol of sampled 3D geometries for each torsional mode. + lis: A list of RDKitMol of sampled 3D geometries for each torsional mode. """ conf = mol.Copy().GetConformer(id=id) origin_coords = mol.GetPositions(id=id) @@ -167,30 +200,31 @@ def get_conformers_by_change_torsions( return conformers_by_change_torsions - def __call__( - self, - mol: RDKitMol, - id: int, - rxn_smiles: Optional[str] = None, - torsions: Optional[List] = None, - no_sample_dangling_bonds: bool = True, - no_greedy: bool = False, - save_dir: Optional[str] = None, - save_plot: bool = True, - ): + def __call__(self, + mol: RDKitMol, + id: int, + rxn_smiles: Optional[str] = None, + torsions: Optional[List] = None, + no_sample_dangling_bonds: bool = True, + no_greedy: bool = False, + save_dir: Optional[str] = None, + save_plot: bool = True, + ): """ Run the workflow of conformer generation. Args: mol (RDKitMol): An RDKitMol object. id (int): The ID of the conformer to be obtained. - rxn_smiles (str, optional): The SMILES of the reaction. The SMILES should be formatted similar to `"reactant1.reactant2>>product1.product2."`. + rxn_smiles (str, optional): The SMILES of the reaction. The SMILES should be formatted similar to + `"reactant1.reactant2>>product1.product2."`. Defaults to ``None``, which means + ``torsions`` will be provided and used to generate conformers. torsions (list, optional): A list of four-atom-index lists indicating the torsional modes. - no_sample_dangling_bonds (bool): Whether to sample dangling bonds. Defaults to False. - no_greedy (bool): Whether to use greedy algorithm to find local minima. If `True`, all the sampled conformers - would be passed to the optimization and verification steps. Defaults to False. + no_sample_dangling_bonds (bool): Whether to sample dangling bonds. Defaults to ``False``. + no_greedy (bool): Whether to use greedy algorithm to find local minima. If ``True``, all the sampled conformers + would be passed to the optimization and verification steps. Defaults to ``False``. save_dir (str or Pathlike object, optional): The path to save the outputs generated during the generation. - save_plot (bool): Whether to save the heat plot for the PES of each torsinal mode. Defaults to True. + save_plot (bool): Whether to save the heat plot for the PES of each torsional mode. Defaults to ``True``. """ # Get bonds which will not be rotated during conformer searching sampler_mol = mol.Copy() @@ -395,27 +429,30 @@ def __call__( return mol -def get_separable_angle_list( - samplings: Union[List, Tuple], from_angles: Optional[Union[List, Tuple]] = None -) -> List[List]: +def get_separable_angle_list(samplings: Union[List, Tuple], + from_angles: Optional[Union[List, Tuple]] = None + ) -> List[List]: """ Get a angle list for each input dimension. For each dimension - The input can be a int, indicating the angles will be evenly sampled; - Or a list, indicate the angles to be sampled; - Examples: - [[120, 240,], 4, 0] => [[120, 240], - [0, 90, 180, 270], - [0]] - List of lists are returned for the sake of further calculation + The input can be a ``int`` indicating the angles will be evenly sampled; + or a ``list`` indicating the angles to be sampled; Args: samplings (Union[List, Tuple]): An array of sampling information. - For each element, it can be either list or int. + For each element, it can be either list or int. from_angles (Union[List, Tuple]): An array of initial angles. - If not set, angles will begin at zeros. + If not set, all angles will begin at zeros. Returns: list: A list of sampled angles sets. + + Examples: + + .. code-block:: python + + get_separable_angle_list([[120, 240,], 4, 0]) + >>> [[120, 240], [0, 90, 180, 270], [0]] + """ from_angles = from_angles or len(samplings) * [0.0] angle_list = [] @@ -444,15 +481,18 @@ def get_separable_angle_list( return angle_list -def get_energy(mol: RDKitMol, confId: int = 0, method: str = "GFN2-xTB") -> float: +def get_energy(mol: RDKitMol, + confId: int = 0, + method: str = "GFN2-xTB", + ) -> float: """ - Calculate the energy of the `RDKitMol` with given confId. The unit is in kcal/mol. - Only support methods already suported either in Spharrow or xtb-python. + Calculate the energy of the ``RDKitMol`` with given ``confId``. The unit is in kcal/mol. + Only support methods already supported either in sparrow or xtb-python. Args: mol (RDKitMol): A RDKitMol molecule object. - confId (int): The ID of the conformer for calculating energy. Defaults to 0. - method (str): Which semiempirical method to be used in running energy calcualtion. Defaults to "GFN2-xTB". + confId (int): The ID of the conformer for calculating energy. Defaults to ``0``. + method (str): Which semi-empirical method to be used in running energy calculation. Defaults to ``"GFN2-xTB"``. Returns: The energy of the conformer. @@ -495,7 +535,7 @@ def get_energy(mol: RDKitMol, confId: int = 0, method: str = "GFN2-xTB") -> floa return energy -def preprocess_energies(energies: np.ndarray): +def preprocess_energies(energies: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Rescale the energy based on the lowest energy. @@ -503,7 +543,7 @@ def preprocess_energies(energies: np.ndarray): energies (np.ndarray): A np.ndarray containing the energies for each sampled point. Returns: - The rescaled energies and the mask pointing out positions having values + tuple: The rescaled energies and the mask pointing out positions having values """ max_energy = np.nanmax(energies) min_energy = np.nanmin(energies) @@ -518,15 +558,26 @@ def preprocess_energies(energies: np.ndarray): return rescaled_energies, mask -def plot_heat_map( - energies: np.ndarray, - minimum_points: List[Tuple], - save_path: str, - mask: np.ndarray = None, - detailed_view: bool = False, - title: str = None, -): - """Plot and save the heat map of a given PES.""" +def plot_heat_map(energies: np.ndarray, + minimum_points: List[Tuple], + save_path: str, + mask: Optional[np.ndarray] = None, + detailed_view: bool = False, + title: Optional[str] = None, + ): + """ + Plot and save the heat map of a given PES. + + Args: + energies (np.ndarray): A ``np.ndarray`` containing the energies for each sampled point. + minimum_points (List[Tuple]): A list of tuples containing the indices of the minimum points. + save_path (str): The path to save the plot. + mask (np.ndarray, optional): A ``np.ndarray`` containing the mask for the energies. + detailed_view (bool): Whether to plot the detailed view of the PES. Defaults to ``False``. + title (str, optional): The title of the plot. + """ + import seaborn as sns + if detailed_view: fig_size = (28, 20) annot = True # detailed view diff --git a/rdmc/conformer_generation/ts_generators.py b/rdmc/conformer_generation/ts_generators.py index d6dda4b7..394a83e2 100644 --- a/rdmc/conformer_generation/ts_generators.py +++ b/rdmc/conformer_generation/ts_generators.py @@ -2,15 +2,16 @@ #-*- coding: utf-8 -*- """ -Modules for ts conformer generation workflows +Modules for TS conformer generation workflows. """ -import os -import numpy as np import logging -import random +import os import pickle from typing import List, Optional, Union +import random + +import numpy as np from rdmc.conformer_generation.utils import * from rdmc.conformer_generation.generators import StochasticConformerGenerator @@ -21,12 +22,47 @@ class TSConformerGenerator: """ The class used to define a workflow for generating a set of TS conformers. + + Args: + rxn_smiles (str): The SMILES of the reaction. The SMILES should be formatted similar to ``"reactant1.reactant2>>product1.product2."``. + multiplicity (int, optional): The spin multiplicity of the reaction. The spin multiplicity will be interpreted from the reaction smiles if this + is not given by the user. + use_smaller_multiplicity (bool, optional): Whether to use the smaller multiplicity when the interpreted multiplicity from the reaction smiles is + inconsistent between reactants and products. Defaults to ``True``. + embedder (TSInitialGuesser, optional): Instance of a :obj:`TSInitialGuesser `. Available options are + :obj:`TSEGNNGuesser `, + :obj:`TSGCNGuesser `, + :obj:`AutoNEBGuesser `, + :obj:`RMSDPPGuesser `, and + :obj:`DEGSMGuesser `. + optimizer (TSOptimizer, optional): Instance of a :obj:`TSOptimizer `. Available options are + :obj:`SellaOptimizer `, + :obj:`OrcaOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`QChemOptimizer `. + pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. + Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (TSVerifier or list of TSVerifiers, optional): The verifier or a list of verifiers used to verify the obtained TS conformer. + Instance of a :obj:`TSVerifier `. + Available options are + :obj:`XTBFrequencyVerifier `, + :obj:`GaussianIRCVerifier `, + :obj:`OrcaIRCVerifier `, + :obj:`QChemIRCVerifier `, and + :obj:`TSScreener `. + sampler (TorisonalSampler, optional): The sampler used to do automated conformer search for the obtained TS conformer. You can use + :obj:`TorsionalSampler ` to define your own sampler. + final_modules (TSOptimizer, TSVerifier or list of TSVerifiers, optional): The final modules can include optimizer in different LoT than previous + one and verifier(s) used to verify the obtained TS conformer. + save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. Defaults to ``None``. """ def __init__(self, rxn_smiles: str, multiplicity: Optional[int] = None, - use_smaller_multiplicity: Optional[bool] = True, + use_smaller_multiplicity: bool = True, embedder: Optional['TSInitialGuesser'] = None, optimizer: Optional['TSOptimizer'] = None, pruner: Optional['ConfGenPruner'] = None, @@ -36,32 +72,48 @@ def __init__(self, save_dir: Optional[str] = None, ) -> 'TSConformerGenerator': """ - Initiate the TS conformer generator object. The best practice is set all information here + The class used to define a workflow for generating a set of TS conformers. Args: - rxn_smiles (str): The SMILES of the reaction. The SMILES should be formatted similar to `"reactant1.reactant2>>product1.product2."`. + rxn_smiles (str): The SMILES of the reaction. The SMILES should be formatted similar to ``"reactant1.reactant2>>product1.product2."``. multiplicity (int, optional): The spin multiplicity of the reaction. The spin multiplicity will be interpreted from the reaction smiles if this - is not given by the user. + is not given by the user. use_smaller_multiplicity (bool, optional): Whether to use the smaller multiplicity when the interpreted multiplicity from the reaction smiles is - inconsistent. - embedder (TSInitialGuesser, optional): The embedder used to generate TS initial guessers. Available options are `TSEGNNGuesser`, `TSGCNGuesser`. - `RMSDPPGuesser`, and `AutoNEBGuesser`. - optimizer (TSOptimizer, optional): The optimizer used to optimize TS geometries. Available options are `SellaOptimizer`, `OrcaOptimizer`, and - `GaussianOptimizer`. - pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. Available options are - `CRESTPruner` and `TorsionPruner`. - verifiers (TSVerifier or list of TSVerifiers, optional): The verifier or a list of verifiers used to verify the obtained TS conformer. Available - options are `GaussianIRCVerifier`, `OrcaIRCVerifier`, and `XTBFrequencyVerifier`. - sampler (TorisonalSampler, optional): The sampler used to do automated conformer search for the obtained TS conformer. + inconsistent between reactants and products. Defaults to ``True``. + embedder (TSInitialGuesser, optional): Instance of a :obj:`TSInitialGuesser `. Available options are + :obj:`TSEGNNGuesser `, + :obj:`TSGCNGuesser `, + :obj:`AutoNEBGuesser `, + :obj:`RMSDPPGuesser `, and + :obj:`DEGSMGuesser `. + optimizer (TSOptimizer, optional): Instance of a :obj:`TSOptimizer `. Available options are + :obj:`SellaOptimizer `, + :obj:`OrcaOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`QChemOptimizer `. + pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. + Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (TSVerifier or list of TSVerifiers, optional): The verifier or a list of verifiers used to verify the obtained TS conformer. + Instance of a :obj:`TSVerifier `. + Available options are + :obj:`XTBFrequencyVerifier `, + :obj:`GaussianIRCVerifier `, + :obj:`OrcaIRCVerifier `, + :obj:`QChemIRCVerifier `, and + :obj:`TSScreener `. + sampler (TorisonalSampler, optional): The sampler used to do automated conformer search for the obtained TS conformer. You can use + :obj:`TorsionalSampler ` to define your own sampler. final_modules (TSOptimizer, TSVerifier or list of TSVerifiers, optional): The final modules can include optimizer in different LoT than previous one and verifier(s) used to verify the obtained TS conformer. - save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. + save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. Defaults to ``None``. """ self.logger = logging.getLogger(f"{self.__class__.__name__}") self.rxn_smiles = rxn_smiles if multiplicity: - self.multiplicity = multiplicity - else: + self.multiplicity = multiplicity + else: r_smi, p_smi = rxn_smiles.split(">>") r_mol = RDKitMol.FromSmiles(r_smi) p_mol = RDKitMol.FromSmiles(p_smi) @@ -99,10 +151,10 @@ def embed_stable_species(self, Args: smiles (str): The reactant or product complex in SMILES. if multiple molecules involve, - use `.` to separate them. + use ``"."`` to separate them. Returns: - An RDKitMol of the reactant or product complex with 3D geometry embedded. + RDKitMol: An RDKitMol of the reactant or product complex with 3D geometry embedded. """ # Split the complex smiles into a list of molecule smiles smiles_list = smiles.split(".") @@ -166,11 +218,11 @@ def generate_seed_mols(self, Args: rxn_smiles (str): The reaction smiles of the reaction. - n_conformers (int, optional): The maximum number of conformers to be generated. Defaults to 20. - shuffle (Bool, optional): Whether or not to shuffle the embedded mols. + n_conformers (int, optional): The maximum number of conformers to be generated. Defaults to ``20``. + shuffle (bool, optional): Whether or not to shuffle the embedded mols. Defaults to ``False``. Returns: - list + list: A list of reactant/product pairs in ``RDKitMol``. """ # Convert SMILES to reactant and product complexes r_smi, p_smi = rxn_smiles.split(">>") @@ -234,16 +286,16 @@ def generate_seed_mols(self, def set_filter(self, ts_mol: 'RDKitMol', n_conformers: int, - ) -> list: + ) -> RDKitMol: """ - Assign the indices of reactions to track wheter the conformers are passed to the following steps. + Assign the indices of reactions to track whether the conformers are passed to the following steps. Args: ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. n_conformers (int): The maximum number of conformers to be passed to the following steps. Returns: - An RDKitMol with KeepIDs having `True` values to be passed to the following steps. + RDKitMol: with ``KeepIDs`` as a list of ``True`` and ``False`` indicating whether a conformer passes the check. """ energy_dict = ts_mol.energy KeepIDs = ts_mol.KeepIDs @@ -259,15 +311,19 @@ def __call__(self, n_conformers: int = 20, n_verifies: int = 20, n_sampling: int = 1, - n_refines: int = 1): + n_refines: int = 1, + ) -> 'RDKitMol': """ Run the workflow of TS conformer generation. Args: - n_conformers (int): The maximum number of conformers to be generated. Defaults to 20. - n_verifies (int): The maximum number of conformers to be passed to the verifiers. Defaults to 20. - n_sampling (int): The maximum number of conformers to be passed to the torsional sampling. Defaults to 1. - n_refines (int): The maximum number of conformers to be passed to the final modeuls. Defaults to 1. + n_conformers (int): The maximum number of conformers to be generated. Defaults to ``20``. + n_verifies (int): The maximum number of conformers to be passed to the verifiers. Defaults to ``20``. + n_sampling (int): The maximum number of conformers to be passed to the torsional sampling. Defaults to ``1``. + n_refines (int): The maximum number of conformers to be passed to the final modules. Defaults to ``1``. + + Returns: + RDKitMol: The TS in RDKitMol object with 3D geometries embedded. """ if self.save_dir: