From 27ee2cd6cba86dbf8657566cba2e0b5d65569882 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Sep 2023 11:34:15 -0400 Subject: [PATCH 01/21] Create a directory in doc for API reference As the number of files in the RDMC repo keep growing, it makes more sense to keep a hierarchical structure for API references, instead of putting everything under the `/source` directly. --- docs/source/{rdmc.conf.rst => reference/conf.rst} | 0 .../{rdmc.forcefield.rst => reference/forcefield.rst} | 0 docs/source/{modules.rst => reference/index.rst} | 0 docs/source/{rdmc.mol.rst => reference/mol.rst} | 0 docs/source/reference/reaction.rst | 7 +++++++ docs/source/reference/ts.rst | 7 +++++++ docs/source/{rdmc.utils.rst => reference/utils.rst} | 0 docs/source/{rdmc.view.rst => reference/view.rst} | 0 8 files changed, 14 insertions(+) rename docs/source/{rdmc.conf.rst => reference/conf.rst} (100%) rename docs/source/{rdmc.forcefield.rst => reference/forcefield.rst} (100%) rename docs/source/{modules.rst => reference/index.rst} (100%) rename docs/source/{rdmc.mol.rst => reference/mol.rst} (100%) create mode 100644 docs/source/reference/reaction.rst create mode 100644 docs/source/reference/ts.rst rename docs/source/{rdmc.utils.rst => reference/utils.rst} (100%) rename docs/source/{rdmc.view.rst => reference/view.rst} (100%) diff --git a/docs/source/rdmc.conf.rst b/docs/source/reference/conf.rst similarity index 100% rename from docs/source/rdmc.conf.rst rename to docs/source/reference/conf.rst diff --git a/docs/source/rdmc.forcefield.rst b/docs/source/reference/forcefield.rst similarity index 100% rename from docs/source/rdmc.forcefield.rst rename to docs/source/reference/forcefield.rst diff --git a/docs/source/modules.rst b/docs/source/reference/index.rst similarity index 100% rename from docs/source/modules.rst rename to docs/source/reference/index.rst diff --git a/docs/source/rdmc.mol.rst b/docs/source/reference/mol.rst similarity index 100% rename from docs/source/rdmc.mol.rst rename to docs/source/reference/mol.rst diff --git a/docs/source/reference/reaction.rst b/docs/source/reference/reaction.rst new file mode 100644 index 00000000..d1d4e812 --- /dev/null +++ b/docs/source/reference/reaction.rst @@ -0,0 +1,7 @@ +rdmc.reaction +======== + +.. automodule:: rdmc.reaction + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/ts.rst b/docs/source/reference/ts.rst new file mode 100644 index 00000000..8e38603e --- /dev/null +++ b/docs/source/reference/ts.rst @@ -0,0 +1,7 @@ +rdmc.ts +======== + +.. automodule:: rdmc.ts + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/rdmc.utils.rst b/docs/source/reference/utils.rst similarity index 100% rename from docs/source/rdmc.utils.rst rename to docs/source/reference/utils.rst diff --git a/docs/source/rdmc.view.rst b/docs/source/reference/view.rst similarity index 100% rename from docs/source/rdmc.view.rst rename to docs/source/reference/view.rst From 3ac7f10f947b47c1b698ca5efcb8f277bb0e9f36 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Sep 2023 11:41:03 -0400 Subject: [PATCH 02/21] Add hierarchical structures of the submodule in /docs/source --- docs/source/rdmc.external.xyz2mol.rst | 7 ------- docs/source/rdmc.rst | 3 +++ .../{rdmc.external.rst => reference/external/index.rst} | 3 ++- docs/source/reference/external/inpwriter/gaussian.rst | 7 +++++++ docs/source/reference/external/inpwriter/index.rst | 7 +++++++ docs/source/reference/external/inpwriter/orca.rst | 7 +++++++ docs/source/reference/external/inpwriter/qchem.rst | 7 +++++++ docs/source/reference/external/inpwriter/utils.rst | 7 +++++++ docs/source/reference/external/logparser/base.rst | 7 +++++++ docs/source/reference/external/logparser/gaussian.rst | 7 +++++++ docs/source/reference/external/logparser/index.rst | 7 +++++++ docs/source/reference/external/logparser/orca.rst | 7 +++++++ docs/source/reference/external/logparser/qchem.rst | 7 +++++++ docs/source/reference/external/logparser/utils.rst | 7 +++++++ .../{rdmc.external.rmg.rst => reference/external/rmg.rst} | 0 docs/source/reference/mathlib/index.rst | 7 +++++++ 16 files changed, 89 insertions(+), 8 deletions(-) delete mode 100644 docs/source/rdmc.external.xyz2mol.rst rename docs/source/{rdmc.external.rst => reference/external/index.rst} (75%) create mode 100644 docs/source/reference/external/inpwriter/gaussian.rst create mode 100644 docs/source/reference/external/inpwriter/index.rst create mode 100644 docs/source/reference/external/inpwriter/orca.rst create mode 100644 docs/source/reference/external/inpwriter/qchem.rst create mode 100644 docs/source/reference/external/inpwriter/utils.rst create mode 100644 docs/source/reference/external/logparser/base.rst create mode 100644 docs/source/reference/external/logparser/gaussian.rst create mode 100644 docs/source/reference/external/logparser/index.rst create mode 100644 docs/source/reference/external/logparser/orca.rst create mode 100644 docs/source/reference/external/logparser/qchem.rst create mode 100644 docs/source/reference/external/logparser/utils.rst rename docs/source/{rdmc.external.rmg.rst => reference/external/rmg.rst} (100%) create mode 100644 docs/source/reference/mathlib/index.rst diff --git a/docs/source/rdmc.external.xyz2mol.rst b/docs/source/rdmc.external.xyz2mol.rst deleted file mode 100644 index 3bcd2b7d..00000000 --- a/docs/source/rdmc.external.xyz2mol.rst +++ /dev/null @@ -1,7 +0,0 @@ -rdmc.external.xyz2mol -===================== - -.. automodule:: rdmc.external.xyz2mol - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file diff --git a/docs/source/rdmc.rst b/docs/source/rdmc.rst index 0ab2b3cf..7fdf2291 100644 --- a/docs/source/rdmc.rst +++ b/docs/source/rdmc.rst @@ -11,6 +11,7 @@ rdmc :maxdepth: 4 rdmc.external + rdmc.mathlib .. toctree:: @@ -19,5 +20,7 @@ rdmc rdmc.conf rdmc.forcefield rdmc.mol + rdmc.reaction + rdmc.ts rdmc.utils rdmc.view diff --git a/docs/source/rdmc.external.rst b/docs/source/reference/external/index.rst similarity index 75% rename from docs/source/rdmc.external.rst rename to docs/source/reference/external/index.rst index ce3ac76b..f5d15e86 100644 --- a/docs/source/rdmc.external.rst +++ b/docs/source/reference/external/index.rst @@ -11,4 +11,5 @@ rdmc.external :maxdepth: 4 rdmc.external.rmg - rdmc.external.xyz2mol + rdmc.external.inpwriter + rdmc.external.logparser diff --git a/docs/source/reference/external/inpwriter/gaussian.rst b/docs/source/reference/external/inpwriter/gaussian.rst new file mode 100644 index 00000000..0c8614e6 --- /dev/null +++ b/docs/source/reference/external/inpwriter/gaussian.rst @@ -0,0 +1,7 @@ +rdmc.external.inpwriter.gaussian +================= + +.. automodule:: rdmc.external.inpwriter.gaussian + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/inpwriter/index.rst b/docs/source/reference/external/inpwriter/index.rst new file mode 100644 index 00000000..9c5a127c --- /dev/null +++ b/docs/source/reference/external/inpwriter/index.rst @@ -0,0 +1,7 @@ +rdmc.external.inpwriter +================= + +.. automodule:: rdmc.external.inpwriter + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/inpwriter/orca.rst b/docs/source/reference/external/inpwriter/orca.rst new file mode 100644 index 00000000..738288f7 --- /dev/null +++ b/docs/source/reference/external/inpwriter/orca.rst @@ -0,0 +1,7 @@ +rdmc.external.inpwriter.orca +================= + +.. automodule:: rdmc.external.inpwriter.orca + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/inpwriter/qchem.rst b/docs/source/reference/external/inpwriter/qchem.rst new file mode 100644 index 00000000..944673b9 --- /dev/null +++ b/docs/source/reference/external/inpwriter/qchem.rst @@ -0,0 +1,7 @@ +rdmc.external.inpwriter.qchem +================= + +.. automodule:: rdmc.external.inpwriter.qchem + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/inpwriter/utils.rst b/docs/source/reference/external/inpwriter/utils.rst new file mode 100644 index 00000000..06027cd6 --- /dev/null +++ b/docs/source/reference/external/inpwriter/utils.rst @@ -0,0 +1,7 @@ +rdmc.external.inpwriter.utils +================= + +.. automodule:: rdmc.external.inpwriter.utils + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/logparser/base.rst b/docs/source/reference/external/logparser/base.rst new file mode 100644 index 00000000..7eed02bf --- /dev/null +++ b/docs/source/reference/external/logparser/base.rst @@ -0,0 +1,7 @@ +rdmc.external.logparser.base +================= + +.. automodule:: rdmc.external.logparser.base + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/logparser/gaussian.rst b/docs/source/reference/external/logparser/gaussian.rst new file mode 100644 index 00000000..8f08f866 --- /dev/null +++ b/docs/source/reference/external/logparser/gaussian.rst @@ -0,0 +1,7 @@ +rdmc.external.logparser.gaussian +================= + +.. automodule:: rdmc.external.logparser.gaussian + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/logparser/index.rst b/docs/source/reference/external/logparser/index.rst new file mode 100644 index 00000000..f9c9ead6 --- /dev/null +++ b/docs/source/reference/external/logparser/index.rst @@ -0,0 +1,7 @@ +rdmc.external.logparser +================= + +.. automodule:: rdmc.external.logparser + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/logparser/orca.rst b/docs/source/reference/external/logparser/orca.rst new file mode 100644 index 00000000..c087ff63 --- /dev/null +++ b/docs/source/reference/external/logparser/orca.rst @@ -0,0 +1,7 @@ +rdmc.external.logparser.orca +================= + +.. automodule:: rdmc.external.logparser.orca + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/logparser/qchem.rst b/docs/source/reference/external/logparser/qchem.rst new file mode 100644 index 00000000..22e5c2a4 --- /dev/null +++ b/docs/source/reference/external/logparser/qchem.rst @@ -0,0 +1,7 @@ +rdmc.external.logparser.utils +================= + +.. automodule:: rdmc.external.logparser.utils + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/logparser/utils.rst b/docs/source/reference/external/logparser/utils.rst new file mode 100644 index 00000000..ca760160 --- /dev/null +++ b/docs/source/reference/external/logparser/utils.rst @@ -0,0 +1,7 @@ +rdmc.external.logparser.qchem +================= + +.. automodule:: rdmc.external.logparser.qchem + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/rdmc.external.rmg.rst b/docs/source/reference/external/rmg.rst similarity index 100% rename from docs/source/rdmc.external.rmg.rst rename to docs/source/reference/external/rmg.rst diff --git a/docs/source/reference/mathlib/index.rst b/docs/source/reference/mathlib/index.rst new file mode 100644 index 00000000..a38f0f99 --- /dev/null +++ b/docs/source/reference/mathlib/index.rst @@ -0,0 +1,7 @@ +rdmc.mathlib +========= + +.. automodule:: rdmc.mathlib + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file From 11bebf6773c0aa5ffd8063435d4d6cf4ff4158dc Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Sep 2023 11:51:21 -0400 Subject: [PATCH 03/21] tempfix to rdmc.rst --- docs/source/{ => reference}/rdmc.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) rename docs/source/{ => reference}/rdmc.rst (53%) diff --git a/docs/source/rdmc.rst b/docs/source/reference/rdmc.rst similarity index 53% rename from docs/source/rdmc.rst rename to docs/source/reference/rdmc.rst index 7fdf2291..668521b5 100644 --- a/docs/source/rdmc.rst +++ b/docs/source/reference/rdmc.rst @@ -10,17 +10,17 @@ rdmc .. toctree:: :maxdepth: 4 - rdmc.external - rdmc.mathlib + external/index + mathlib/index .. toctree:: :maxdepth: 4 - rdmc.conf - rdmc.forcefield - rdmc.mol - rdmc.reaction - rdmc.ts - rdmc.utils - rdmc.view + conf + forcefield + mol + reaction + ts + utils + view From cc7fb208956381e914453bc13aa84e2023332309 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Sep 2023 23:23:54 -0400 Subject: [PATCH 04/21] Correct the toctree for docs build --- docs/source/index.rst | 4 ++-- docs/source/reference/external/index.rst | 6 +++--- docs/source/reference/external/inpwriter/index.rst | 12 ++++++++++-- docs/source/reference/external/logparser/index.rst | 13 +++++++++++-- docs/source/reference/index.rst | 7 ------- 5 files changed, 26 insertions(+), 16 deletions(-) delete mode 100644 docs/source/reference/index.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 515d4656..a1e604c2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,7 +1,7 @@ RDMC Documentation =================== -**RDMC (RDkit Molecule and Conformer)** is A light-weight wrapper for **RDKit** Molecule and Conformer related operations. RDkit is great! I like its versatility and speed, but +**RDMC (Reaction Data and Molecule Conformer)** is A light-weight wrapper for **RDKit** Molecule and Conformer related operations. RDkit is great! I like its versatility and speed, but personally, I find learning to use RDKit is not easy. I usually have to go back and forth to check if RDKit has certain methods and how to use them, since those molecule operations are usually located in different modules. I wrote this tiny thing majorly aiming to make my life easier and to provide a convenient tool so that users can just import a single module / class instead of remembering which method in what module. @@ -18,7 +18,7 @@ Contents .. toctree:: :maxdepth: 2 - rdmc + reference/rdmc license diff --git a/docs/source/reference/external/index.rst b/docs/source/reference/external/index.rst index f5d15e86..085e704d 100644 --- a/docs/source/reference/external/index.rst +++ b/docs/source/reference/external/index.rst @@ -10,6 +10,6 @@ rdmc.external .. toctree:: :maxdepth: 4 - rdmc.external.rmg - rdmc.external.inpwriter - rdmc.external.logparser + rmg + inpwriter/index + logparser/index diff --git a/docs/source/reference/external/inpwriter/index.rst b/docs/source/reference/external/inpwriter/index.rst index 9c5a127c..7f3ff2ff 100644 --- a/docs/source/reference/external/inpwriter/index.rst +++ b/docs/source/reference/external/inpwriter/index.rst @@ -1,7 +1,15 @@ rdmc.external.inpwriter -================= +======================== .. automodule:: rdmc.external.inpwriter :members: :undoc-members: - :show-inheritance: \ No newline at end of file + :show-inheritance: + +.. toctree:: + :maxdepth: 4 + + gaussian + qchem + orca + utils \ No newline at end of file diff --git a/docs/source/reference/external/logparser/index.rst b/docs/source/reference/external/logparser/index.rst index f9c9ead6..fab18242 100644 --- a/docs/source/reference/external/logparser/index.rst +++ b/docs/source/reference/external/logparser/index.rst @@ -1,7 +1,16 @@ rdmc.external.logparser -================= +======================= .. automodule:: rdmc.external.logparser :members: :undoc-members: - :show-inheritance: \ No newline at end of file + :show-inheritance: + +.. toctree:: + :maxdepth: 4 + + base + gaussian + qchem + orca + utils \ No newline at end of file diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst deleted file mode 100644 index 48a7f981..00000000 --- a/docs/source/reference/index.rst +++ /dev/null @@ -1,7 +0,0 @@ -rdmc -==== - -.. toctree:: - :maxdepth: 4 - - rdmc From 092a6042034307bf91f3e7d884577c3b49162104 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Sep 2023 23:24:29 -0400 Subject: [PATCH 05/21] Remove warnings about "=" too short in docs build --- docs/source/reference/external/inpwriter/gaussian.rst | 2 +- docs/source/reference/external/inpwriter/orca.rst | 2 +- docs/source/reference/external/inpwriter/qchem.rst | 2 +- docs/source/reference/external/inpwriter/utils.rst | 2 +- docs/source/reference/external/logparser/base.rst | 2 +- docs/source/reference/external/logparser/gaussian.rst | 2 +- docs/source/reference/external/logparser/orca.rst | 2 +- docs/source/reference/external/logparser/qchem.rst | 6 +++--- docs/source/reference/external/logparser/utils.rst | 8 ++++---- docs/source/reference/mathlib/index.rst | 2 +- docs/source/reference/reaction.rst | 2 +- 11 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/source/reference/external/inpwriter/gaussian.rst b/docs/source/reference/external/inpwriter/gaussian.rst index 0c8614e6..10b8c259 100644 --- a/docs/source/reference/external/inpwriter/gaussian.rst +++ b/docs/source/reference/external/inpwriter/gaussian.rst @@ -1,5 +1,5 @@ rdmc.external.inpwriter.gaussian -================= +================================= .. automodule:: rdmc.external.inpwriter.gaussian :members: diff --git a/docs/source/reference/external/inpwriter/orca.rst b/docs/source/reference/external/inpwriter/orca.rst index 738288f7..f1616dfb 100644 --- a/docs/source/reference/external/inpwriter/orca.rst +++ b/docs/source/reference/external/inpwriter/orca.rst @@ -1,5 +1,5 @@ rdmc.external.inpwriter.orca -================= +============================= .. automodule:: rdmc.external.inpwriter.orca :members: diff --git a/docs/source/reference/external/inpwriter/qchem.rst b/docs/source/reference/external/inpwriter/qchem.rst index 944673b9..9fec8a8f 100644 --- a/docs/source/reference/external/inpwriter/qchem.rst +++ b/docs/source/reference/external/inpwriter/qchem.rst @@ -1,5 +1,5 @@ rdmc.external.inpwriter.qchem -================= +============================== .. automodule:: rdmc.external.inpwriter.qchem :members: diff --git a/docs/source/reference/external/inpwriter/utils.rst b/docs/source/reference/external/inpwriter/utils.rst index 06027cd6..94261978 100644 --- a/docs/source/reference/external/inpwriter/utils.rst +++ b/docs/source/reference/external/inpwriter/utils.rst @@ -1,5 +1,5 @@ rdmc.external.inpwriter.utils -================= +============================== .. automodule:: rdmc.external.inpwriter.utils :members: diff --git a/docs/source/reference/external/logparser/base.rst b/docs/source/reference/external/logparser/base.rst index 7eed02bf..4687f208 100644 --- a/docs/source/reference/external/logparser/base.rst +++ b/docs/source/reference/external/logparser/base.rst @@ -1,5 +1,5 @@ rdmc.external.logparser.base -================= +============================ .. automodule:: rdmc.external.logparser.base :members: diff --git a/docs/source/reference/external/logparser/gaussian.rst b/docs/source/reference/external/logparser/gaussian.rst index 8f08f866..14ac34b2 100644 --- a/docs/source/reference/external/logparser/gaussian.rst +++ b/docs/source/reference/external/logparser/gaussian.rst @@ -1,5 +1,5 @@ rdmc.external.logparser.gaussian -================= +================================= .. automodule:: rdmc.external.logparser.gaussian :members: diff --git a/docs/source/reference/external/logparser/orca.rst b/docs/source/reference/external/logparser/orca.rst index c087ff63..2c75b56f 100644 --- a/docs/source/reference/external/logparser/orca.rst +++ b/docs/source/reference/external/logparser/orca.rst @@ -1,5 +1,5 @@ rdmc.external.logparser.orca -================= +============================ .. automodule:: rdmc.external.logparser.orca :members: diff --git a/docs/source/reference/external/logparser/qchem.rst b/docs/source/reference/external/logparser/qchem.rst index 22e5c2a4..27542772 100644 --- a/docs/source/reference/external/logparser/qchem.rst +++ b/docs/source/reference/external/logparser/qchem.rst @@ -1,7 +1,7 @@ -rdmc.external.logparser.utils -================= +rdmc.external.logparser.qchem +============================== -.. automodule:: rdmc.external.logparser.utils +.. automodule:: rdmc.external.logparser.qchem :members: :undoc-members: :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/external/logparser/utils.rst b/docs/source/reference/external/logparser/utils.rst index ca760160..89374df5 100644 --- a/docs/source/reference/external/logparser/utils.rst +++ b/docs/source/reference/external/logparser/utils.rst @@ -1,7 +1,7 @@ -rdmc.external.logparser.qchem -================= +rdmc.external.logparser.utils +============================== -.. automodule:: rdmc.external.logparser.qchem +.. automodule:: rdmc.external.logparser.utils :members: :undoc-members: - :show-inheritance: \ No newline at end of file + :show-inheritance: diff --git a/docs/source/reference/mathlib/index.rst b/docs/source/reference/mathlib/index.rst index a38f0f99..71b26dea 100644 --- a/docs/source/reference/mathlib/index.rst +++ b/docs/source/reference/mathlib/index.rst @@ -1,5 +1,5 @@ rdmc.mathlib -========= +============ .. automodule:: rdmc.mathlib :members: diff --git a/docs/source/reference/reaction.rst b/docs/source/reference/reaction.rst index d1d4e812..eec18656 100644 --- a/docs/source/reference/reaction.rst +++ b/docs/source/reference/reaction.rst @@ -1,5 +1,5 @@ rdmc.reaction -======== +============= .. automodule:: rdmc.reaction :members: From 7bc37d05da56be800bf3887294ff04cabf0db2c1 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Sep 2023 23:41:33 -0400 Subject: [PATCH 06/21] Correct the docstring for hierarchy_cluster --- rdmc/conf.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/rdmc/conf.py b/rdmc/conf.py index 371914b6..0b676540 100644 --- a/rdmc/conf.py +++ b/rdmc/conf.py @@ -596,25 +596,25 @@ def hierarchy_cluster(self, ): """ The implementation of an hierarchy clustering method based on scipy. - It is basically defining clusters based on points within a hypercube defined by threhold. - More details refer to: - https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.fclusterdata.html + It is basically defining clusters based on points within a hypercube defined by threshold. + More details refer to `scipy `_. Args: confs (list): A list of conformer IDs. - threshold (float, optional): The threshold (in degree) used for hierarchy clustering. Defaults to 5. - criterion (str, optional): Specifies the criterion for forming flat clusters. Valid values are ‘inconsistent’, - ‘distance’ (default), or ‘maxclust’ cluster formation algorithms - method (str, optional): The linkage method to use. Valid values are single, complete, average (default), - weighted, median centroid, and ward. Except median centroid (O(n^3)), others have a - computational cost scaled by O(n^2). - adjust_periodicity (bool, optional): Since dihedral angles have a period of 360 degrees. Defaults to ``True``. + threshold (float, optional): The threshold (in degree) used for hierarchy clustering. Defaults to ``5``. + criterion (str, optional): Specifies the criterion for forming flat clusters. Valid values are ``'inconsistent'``, + ``'distance'`` (default), or ``'maxclust'`` cluster formation algorithms. + method (str, optional): The linkage method to use. Valid values are ``'single'``, ``'complete'``, ``'average'`` (default), + ``'weighted'``, ``'median centroid'``, and ``'ward'``. Except median centroid (:math:`O(n^3)`), others have a + computational cost scaled by :math:`O(n^2)`. + adjust_periodicity (bool, optional): Since dihedral angles have a period of 360 degrees, adjusting for periodicity helps + create better clustering results. Defaults to ``True``. It is suggested to run twice with this value be ``True`` and ``False`` to get a better performance. - as_dict (bool): Return the result as a dict object with keys for the index of clusters and values - of conformer indexes (provided in confs). Otherwise, return as a list of grouped + as_dict (bool): Return the result as a ``dict`` object with keys for the index of clusters and values + of conformer indexes (provided in ``confs``). Otherwise, return as a ``list`` of grouped conformer indexes. Defaults to ``True``. - as_list_idx (bool): Return the indexes in the `confs` list other than the value in the ``confs``. + as_list_idx (bool): Return the indexes in the ``confs`` list other than the value in the ``confs``. Default to ``False``. """ # Generate torsional matrix From 09b20819b015b2c867ce715c04e450ed6123e604 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Sep 2023 23:46:21 -0400 Subject: [PATCH 07/21] Correct auto_update_prop and get_mol to remove sphinx build errors --- rdmc/external/logparser/base.py | 37 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/rdmc/external/logparser/base.py b/rdmc/external/logparser/base.py index 5b2b497b..312a145c 100644 --- a/rdmc/external/logparser/base.py +++ b/rdmc/external/logparser/base.py @@ -53,14 +53,20 @@ def auto_update_prop(update_fun: str): Args: update_fun (str): The method name for the update function. - Example: - @property - @auto_update_prop('update_prop1') - def prop1(self): - pass + Examples: + This decorator can be used as follows: + + .. code-block:: python + + @property + @auto_update_prop('update_prop1') + def prop1(self): + "docstring for prop1" + + def update_prop1(self): + # define how to get the value of prop1 + # and set the value to self._prop1 - def update_prop1(self): - # define self._prop1 """ def wrapper(func): @property @@ -405,27 +411,28 @@ def get_mol(self, sanitize: Optional[bool] = None, ) -> 'RDKitMol': """ - Perceive the xyzs in the file and turn the geometries to conformers. + Perceive the xyzs in the file, create a :func:`rdmc.mol.RDKitMol` and convert the geometries to its conformers. Args: refid (int): The conformer ID in the log file to be used as the reference for mol perception. - Defaults to -1, meaning it is determined by the following criteria: + Defaults to ``-1``, meaning it is determined by the following criteria: + - For opt, it is the last geometry if succeeded; otherwise, the initial geometry; - For freq, it is the geometry input; - For scan, it is the geometry input; - For IRC, uses the initial geometry if bidirectional job; uses the last converged geometry if uni-directional job. - embed_confs (bool): Whether to embed intermediate conformers in the file to the mol. - Defaults to ``True``. To clear, at least one conformer will be included in - obtained mol, and its geometry is determined by `refid`. - converged (bool): Whether to only embed converged conformers to the mol. This option + embed_confs (bool): Whether to embed intermediate conformers in the file to the obtained molecule. + Defaults to ``True``. To be clear, at least one conformer will be included in + obtained mol, and its geometry is determined by ``refid``. + converged (bool): Whether to only embed converged conformers to the obtained molecule. This option is only valid when ``embed_confs`` is ``True``. neglect_spin (bool): Whether to neglect the error when spin multiplicity are different between the generated mol and the value in the output file. This can be useful for calculations involves TS. Defaults to ``True``. backend (str): The backend engine for parsing XYZ. Defaults to ``'openbabel'``. - sanitize (bool): Whether to sanitize the generated mol. Defaults to `True`. - If a TS involved in the job, better to set it `False` + sanitize (bool): Whether to sanitize the generated molecule. Defaults to ``True``. + If a TS involved in the job, better to set it ``False``. Returns: RDKitMol: a molecule generated from the output file. From fd034a60df7b3b7699f22c101565f48e95c260d3 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Sep 2023 23:51:31 -0400 Subject: [PATCH 08/21] Correct SaturateXX in mol to solve errors in building docs using `::` and extra empty line to create a block --- rdmc/mol.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/rdmc/mol.py b/rdmc/mol.py index b1cd1da5..9387aa47 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -1481,8 +1481,10 @@ def SaturateBiradicalSites12(self, verbose: bool = True): """ A method help to saturate 1,2 biradicals to match the given - molecule spin multiplicity. E.g., + molecule spin multiplicity. E.g.:: + *C - C* => C = C + In the current implementation, no error will be raised, if the function doesn't achieve the goal. This function has not been been tested on nitrogenate. @@ -1563,8 +1565,10 @@ def SaturateBiradicalSitesCDB(self, """ A method help to saturate biradicals that have conjugated double bond in between to match the given molecule spin multiplicity. E.g, 1,4 biradicals can be saturated - if there is a unsaturated bond between them: + if there is a unsaturated bond between them:: + *C - C = C - C* => C = C - C = C + In the current implementation, no error will be raised, if the function doesn't achieve the goal. This function has not been been tested on nitrogenate. @@ -1572,8 +1576,9 @@ def SaturateBiradicalSitesCDB(self, Args: multiplicity (int): The target multiplicity. chain_length (int): How long the conjugated double bond chain is. - A larger value will result in longer time. defaults to 8. - verbose (int): Whether to print additional information. Defaults to ``True``. + A larger value will result in longer computational time. + Defaults to ``8``. + verbose (bool): Whether to print additional information. Defaults to ``True``. """ cur_multiplicity = self.GetSpinMultiplicity() if cur_multiplicity == multiplicity: @@ -1671,8 +1676,10 @@ def SaturateCarbene(self, verbose: bool = True): """ A method help to saturate carbenes and nitrenes to match the given - molecule spin multiplicity: + molecule spin multiplicity:: + *-C-* (triplet) => C-(**) (singlet) + In the current implementation, no error will be raised, if the function doesn't achieve the goal. This function has not been been tested on nitrogenate. @@ -1721,12 +1728,14 @@ def SaturateMol(self, verbose: bool = False): """ A method help to saturate the molecule to match the given - molecule spin multiplicity. This is just a wrapper to call both - `SaturateBiradicalSites12`, `SaturateBiradicalSitesCDB`, and - `SaturateCarbene`: + molecule spin multiplicity. This is just a wrapper to call + :func:`SaturateBiradicalSites12`, :func:`SaturateBiradicalSitesCDB`, and + :func:`SaturateCarbene`:: + *C - C* => C = C *C - C = C - C* => C = C - C = C *-C-* (triplet) => C-(**) (singlet) + In the current implementation, no error will be raised, if the function doesn't achieve the goal. This function has not been been tested on nitrogenate. @@ -1735,6 +1744,7 @@ def SaturateMol(self, multiplicity (int): The target multiplicity. chain_length (int): How long the conjugated double bond chain is. A larger value will result in longer time. + Defaults to ``8``. verbose (bool): Whether to print intermediate information. Defaults to ``False``. """ From c6b11e7cb5a89a30f2d8fbc9c0812b9b537ef147 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 8 Sep 2023 00:31:07 -0400 Subject: [PATCH 09/21] Correct docstrings in the TS module --- rdmc/ts.py | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/rdmc/ts.py b/rdmc/ts.py index ce7bc580..01c7e62d 100644 --- a/rdmc/ts.py +++ b/rdmc/ts.py @@ -115,18 +115,20 @@ def clean_ts(r_mol: 'RDKitMol', p_mol: 'RDKitMol', ts_mol: 'RDKitMol'): """ - Cleans transition state `ts_mol` by removing all bonds that correspond to broken or formed bonds. - `r_mol`, `p_mol`, and `ts_mol` need to be atom mapped. Bond order changes are not considered. + Cleans transition state ``ts_mol`` by removing all bonds that correspond to broken or formed bonds. + ``r_mol``, ``p_mol``, and ``ts_mol`` need to be atom mapped. Bond order changes are not considered. Args: r_mol (RDKitMol): the reactant complex. p_mol (RDKitMol): the product complex. - ts_mol (RDKitMol): the transition state corresponding to `r_mol` and `p_mol`. + ts_mol (RDKitMol): the transition state corresponding to ``r_mol`` and ``p_mol``. Returns: - RDKitMol: an edited version of ts_mol, which is the original `ts_mol` with cleaned bonding. - list: broken bonds: A list of length-2 tuples that contains the atom indexes of the bonds broken in the rxn. - formed bonds: A list of length-2 tuples that contains the atom indexes of the bonds formed in the rxn. + RDKitMol: an edited version of ``ts_mol``, which is the original ``ts_mol`` with cleaned bonding. + list: + + - broken bonds: A list of length-2 tuples that contains the atom indexes of the bonds broken in the reaction. + - formed bonds: A list of length-2 tuples that contains the atom indexes of the bonds formed in the reaction. """ r_bonds, p_bonds, ts_bonds = _get_bonds_as_sets(r_mol, p_mol, ts_mol) formed_bonds, broken_bonds = p_bonds - r_bonds, r_bonds - p_bonds @@ -225,27 +227,27 @@ def guess_rxn_from_normal_mode(xyz: np.array, backend: str = 'openbabel', multiplicity: int = 1): """ - Guess reaction according to the normal mode analysis for a TS. + Guess reaction according to the normal mode analysis for a transition state. Args: xyz (np.array): The xyz coordinates of the transition state. It should have a - size of N x 3. - symbols (np.array): The symbols of each atoms. It should have a size of N. + size of :math:`N \\times 3`. + symbols (np.array): The symbols of each atoms. It should have a size of :math:`N`. disp (np.array): The displacement of the normal mode. It should have a size of - N x 3. + :math:`N \\times 3`. amplitude (float): The amplitude of the motion. If a single value is provided then the guess - will be unique (if available). 0.25 will be the default. Otherwise, a list + will be unique (if available). ``0.25`` will be the default. Otherwise, a list can be provided, and all possible results will be returned. - weights (bool or np.array): If ``True``, use the sqrt(atom mass) as a scaling factor to the displacement. - If ``False``, use the identity weights. If a N x 1 ``np.array` is provided, then - The concern is that light atoms (e.g., H) tend to have larger motions - than heavier atoms. + weights (bool or np.array): If ``True``, use the :math:`\\sqrt(atom mass)` as a scaling factor to the displacement. + If ``False``, use the identity weights. If a :math:`N \\times 1` ``np.ndarray`` is provided, + then use the provided weights. The concern is that light atoms (e.g., H) + tend to have larger motions than heavier atoms. backend (str): The backend used to perceive xyz. Defaults to ``'openbabel'``. - multiplicity (int): The spin multiplicity of the transition states. Defaults to 1. + multiplicity (int): The spin multiplicity of the transition states. Defaults to ``1``. Returns: - list: a list of potential reactants - list: a list of potential products + list: Potential reactants + list: Potential products """ if isinstance(amplitude, float): amplitude = [amplitude] @@ -310,11 +312,11 @@ def examine_normal_mode(r_mol: RDKitMol, size of N x 3. disp (np.array): The displacement of the normal mode. It should have a size of N x 3. - amplitude (float): The amplitude of the motion. Defaults to 0.25. + amplitude (float): The amplitude of the motion. Defaults to ``0.25``. weights (bool or np.array): If ``True``, use the sqrt(atom mass) as a scaling factor to the displacement. - If ``False``, use the identity weights. If a N x 1 ``np.array` is provided, then - The concern is that light atoms (e.g., H) tend to have larger motions - than heavier atoms. + If ``False``, use the identity weights. If a N x 1 ``np.array`` is provided, then + The concern is that light atoms (e.g., H) tend to have larger motions + than heavier atoms. verbose (bool): If print detailed information. Defaults to ``True``. as_factors (bool): If return the value of factors instead of a judgment. Defaults to ``False`` From 244d1753163b0bbb698e6086d65dc903dd22632e Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 8 Sep 2023 00:38:36 -0400 Subject: [PATCH 10/21] Correct conformer_viewer docstring to remove errors Add code-block for demonstration of the default argument of style_spec --- rdmc/view.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/rdmc/view.py b/rdmc/view.py index 0b8127c4..8203527f 100644 --- a/rdmc/view.py +++ b/rdmc/view.py @@ -102,25 +102,30 @@ def conformer_viewer(mol: 'RDKitMol', Args: mol (RDKitMol): An RDKitMol object with embedded conformers. - conf_ids (list, optional): A list of conformer ids (int) to be overlaid and viewed. + conf_ids (list, optional): A list of conformer ids (as ``int``) to be overlaid and viewed. If not provided, all embedded conformers will be used. highlight_ids (list, optional): It is possible to highlight some of the conformers while greying out other conformers by providing the conformer IDs you want to highlight. opacity (float, optional): Set the opacity of the non-highlighted conformers and is only used with the highlighting feature. - the value should be a float number between 0 to 1. The default value is 0.5. - Values below 0.3 may be hard to see. - style_spec (dict, Optional): Style of the shown molecule. The default setting is - {'stick': {'radius': 0.05, 'color': '#f2f2f2'}, - 'sphere': {'scale': 0.25},} + the value should be a ``float`` between ``0.0`` to ``1.0``. The default value is ``0.5``. + Values below ``0.3`` may be hard to see. + style_spec (dict, Optional): Style of the shown molecule. The default setting is: + + .. code-block:: javascript + + {'stick': {'radius': 0.05, + 'color': '#f2f2f2'}, + 'sphere': {'scale': 0.25},} + which set both bond width/color and atom sizes. For more details, please refer to the - original APIs in 3DMol.js. + original APIs in `3DMol.js `_. viewer (py3Dmol.view, optional): Provide an existing viewer, instead of create a new one. viewer_size (tuple, optional): Set the viewer size. Only useful if ``viewer`` is not provided. - Defaults to (400, 400). - viewer_loc (tuple, optional): The location of the viewer in the grid. E.g., (0, 1). Defaults to None. + Defaults to ``(400, 400)``. + viewer_loc (tuple, optional): The location of the viewer in the grid. E.g., ``(0, 1)``. Only useful if ``viewer`` is not provided. Returns: - py3Dmol.view: The molecule viewer. + py3Dmol.view: The conformer viewer. """ if not viewer: viewer = py3Dmol.view(width=viewer_size[0], height=viewer_size[1]) From ecb6d509f25c35d8272d71f6dc9b0936f7551002 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 8 Sep 2023 00:40:13 -0400 Subject: [PATCH 11/21] Update the conf.py for docs configuration 1. Change the theme to pydata; 2. Change how Returns is displayed 3. Auto update year --- docs/source/conf.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index ca03f223..d7df6778 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -10,6 +10,7 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # +import datetime import os import sys sys.path.insert(0, os.path.abspath('../../')) @@ -18,8 +19,9 @@ # -- Project information ----------------------------------------------------- project = 'RDMC' -copyright = '2020, Xiaorui Dong' -author = 'Xiaorui Dong' +copyright = f'2020-{datetime.datetime.now().year}, Xiaorui Dong' +author = 'Xiaorui Dong, Lagnajit Pattanaik, Shih-Cheng Li, ' \ + 'Kevin Spiekermann, Hao-Wei Pang, and William H. Green' # The full version, including alpha/beta/rc tags release = '0.1' @@ -50,9 +52,21 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -# -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +html_theme = 'pydata_sphinx_theme' +html_theme_options = { + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/xiaoruidong/rdmc", + "icon": "fa-brands fa-square-github", + "type": "fontawesome", + }, + ] +} + +# Make the `Returns` in docstring behave like `Args` +napoleon_custom_sections = [('Returns', 'params_style')] # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, From 117060b2971909dede7b4da15bf7097f8baf3327 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 8 Sep 2023 00:47:36 -0400 Subject: [PATCH 12/21] Add the API reference for mathlib module --- docs/source/reference/mathlib/curvefit.rst | 7 +++++++ docs/source/reference/mathlib/geom.rst | 7 +++++++ docs/source/reference/mathlib/greedymin.rst | 7 +++++++ docs/source/reference/mathlib/index.rst | 9 ++++++++- rdmc/mathlib/geom.py | 16 ++++++++-------- 5 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 docs/source/reference/mathlib/curvefit.rst create mode 100644 docs/source/reference/mathlib/geom.rst create mode 100644 docs/source/reference/mathlib/greedymin.rst diff --git a/docs/source/reference/mathlib/curvefit.rst b/docs/source/reference/mathlib/curvefit.rst new file mode 100644 index 00000000..1799de35 --- /dev/null +++ b/docs/source/reference/mathlib/curvefit.rst @@ -0,0 +1,7 @@ +rdmc.mathlib.curvefit +================================= + +.. automodule:: rdmc.mathlib.curvefit + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/mathlib/geom.rst b/docs/source/reference/mathlib/geom.rst new file mode 100644 index 00000000..2d9c631e --- /dev/null +++ b/docs/source/reference/mathlib/geom.rst @@ -0,0 +1,7 @@ +rdmc.mathlib.geom +================================= + +.. automodule:: rdmc.mathlib.geom + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/mathlib/greedymin.rst b/docs/source/reference/mathlib/greedymin.rst new file mode 100644 index 00000000..59a90a18 --- /dev/null +++ b/docs/source/reference/mathlib/greedymin.rst @@ -0,0 +1,7 @@ +rdmc.mathlib.greedymin +================================= + +.. automodule:: rdmc.mathlib.greedymin + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/mathlib/index.rst b/docs/source/reference/mathlib/index.rst index 71b26dea..b823a22b 100644 --- a/docs/source/reference/mathlib/index.rst +++ b/docs/source/reference/mathlib/index.rst @@ -4,4 +4,11 @@ rdmc.mathlib .. automodule:: rdmc.mathlib :members: :undoc-members: - :show-inheritance: \ No newline at end of file + :show-inheritance: + +.. toctree:: + :maxdepth: 4 + + geom + curvefit + greedymin diff --git a/rdmc/mathlib/geom.py b/rdmc/mathlib/geom.py index ab70cb9c..04bc0d8a 100644 --- a/rdmc/mathlib/geom.py +++ b/rdmc/mathlib/geom.py @@ -131,20 +131,20 @@ def rotate(coords: np.array, about: Optional[np.array] = None,): """ Rotate the coordinates according to the angles about the x, y, and z axes. The rotation is - about the origin, but their are a few options about choosing the 'about location'. + about the origin, but there are a few options about choosing the ``about`` location. Args: - coords (np.array): The 3D coordinates in numpy array with a size of N x 3. - angles (np.array): An array with a size of (1,3) indicate the rotation angles about the + coords (np.array): The 3D coordinates in numpy array with a size of :math:`N \\times 3`. + angles (np.array): An array with a size of ``(1,3)`` indicates the rotation angles about the x, y, and z axes, respectively. - degrees (bool): If the angles are defined as degrees. Defaults to False. + degrees (bool): If the angles are defined as degrees. Defaults to ``False``. about_center (bool): Whether to rotate the coordinates about their center. - Defaults to False. Note `about_center` cannot be assigned simultaneously with `about`. + Defaults to ``False``. Note ``about_center`` cannot be assigned simultaneously with ``about``. about (np.array): The coordinate that the rotation is about. Should be a vector with a length of 3. - It is defaults to `None`, and the coordinates is rotated about the origin. - `about` cannot be specified along with `about_center. + It is defaults to ``None``, rotating about the origin. + ``about`` cannot be specified along with ``about_center``. Returns: - np.array: An numpy array with the same size as the original coords. + np.array: coordinates after the rotation. """ if about_center and about is not None: raise ValueError('about and about_center cannot be specified simultaneously.') From 2abdc02675b9abebd02b3a3765c7f6c0ff37e462 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 11:49:43 -0400 Subject: [PATCH 13/21] Update index page and add cite/credits pages 1. Update the index with new introduction paragraph, installation guide, developer information 2. add a how to cite page 3. add a credits page 4. add developer's social media info --- docs/source/_static/GitHub_icon.svg | 3 + docs/source/_static/Google_Scholar_icon.svg | 1 + docs/source/_static/LinkedIn_icon.svg | 1 + docs/source/_static/custom.css | 4 + docs/source/conf.py | 4 +- docs/source/index.rst | 105 ++++++++++++++++++-- docs/source/reference/cite.rst | 21 ++++ docs/source/reference/credits.rst | 57 +++++++++++ docs/source/{ => reference}/license.rst | 2 +- 9 files changed, 186 insertions(+), 12 deletions(-) create mode 100644 docs/source/_static/GitHub_icon.svg create mode 100644 docs/source/_static/Google_Scholar_icon.svg create mode 100644 docs/source/_static/LinkedIn_icon.svg create mode 100644 docs/source/_static/custom.css create mode 100644 docs/source/reference/cite.rst create mode 100644 docs/source/reference/credits.rst rename docs/source/{ => reference}/license.rst (70%) diff --git a/docs/source/_static/GitHub_icon.svg b/docs/source/_static/GitHub_icon.svg new file mode 100644 index 00000000..a8d11740 --- /dev/null +++ b/docs/source/_static/GitHub_icon.svg @@ -0,0 +1,3 @@ + + + diff --git a/docs/source/_static/Google_Scholar_icon.svg b/docs/source/_static/Google_Scholar_icon.svg new file mode 100644 index 00000000..77538309 --- /dev/null +++ b/docs/source/_static/Google_Scholar_icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/source/_static/LinkedIn_icon.svg b/docs/source/_static/LinkedIn_icon.svg new file mode 100644 index 00000000..46087a0f --- /dev/null +++ b/docs/source/_static/LinkedIn_icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css new file mode 100644 index 00000000..2319ae72 --- /dev/null +++ b/docs/source/_static/custom.css @@ -0,0 +1,4 @@ +.social-icon { + height: 1rem; + width: auto; +} \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index d7df6778..4cecd23e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,7 +14,6 @@ import os import sys sys.path.insert(0, os.path.abspath('../../')) -import sphinx_rtd_theme # -- Project information ----------------------------------------------------- @@ -72,6 +71,9 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] +html_css_files = [ + 'custom.css', +] # -- Packages that are not importable from the default environment --------- diff --git a/docs/source/index.rst b/docs/source/index.rst index a1e604c2..16debffc 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,17 +1,57 @@ RDMC Documentation =================== -**RDMC (Reaction Data and Molecule Conformer)** is A light-weight wrapper for **RDKit** Molecule and Conformer related operations. RDkit is great! I like its versatility and speed, but -personally, I find learning to use RDKit is not easy. I usually have to go back and forth to check if RDKit has certain methods and how to use them, -since those molecule operations are usually located in different modules. I wrote this tiny thing majorly aiming to make my life easier and to -provide a convenient tool so that users can just import a single module / class instead of remembering which method in what module. +**RDMC (Reaction Data and Molecular Conformer)** is an open-source lightweight software package specialized in handling Reaction Data and Molecular (including transition states) Conformers. -To start with, simply try:: +It contains various modules and classes (e.g., ``RDKitMol``, ``Reaction``, ``view``) helpful for relevant tasks to make conversion, visualization, manipulation, and analysis of molecules easier. +It also provides solutions to pipelining tasks to achieve high-throughput generating and processing of large amount of molecule/reaction data. It is written in Python and has dependencies only +on popular packages (i.e., ``numpy``, ``scipy``, ``matplotlib``, ``rdkit``, ``openbabel``, ``py3dmol``, ``ase``, ``networkx``, ``cclib``), and you can easily incorporate it into your own Python scripts. - from rdmc.mol import RDKitMol +The source code of the RDMC software package is hosted on GitHub, and its binary distribution is available on Anaconda Cloud. The easiest way to install RDMC is to use ``conda`` or ``mamba``:: -and see what you can do with this ``RDKitMol`` class! + conda install -c xiaoruidong rdmc +Or + +.. code-block:: bash + + mamba install -c xiaoruidong rdmc + +``conda`` can be installed by via `Anaconda `_ and, ``mamba`` can be installed via `Mambaforge `_. + +You can also install RDMC from the source code: + +.. code-block:: bash + + git clone https://github.com/xiaoruidong/rdmc + cd RDMC + conda env create -f environment.yml + conda activate rdmc + python setup.py install + +To start with, simply try: + +.. code-block:: python + + from rdmc import RDKitMol, Reaction + mol = RDKitMol('CCO') + rxn = Reaction('CCO>>CC(=O)O') + +And see what the ``mol`` and ``rxn`` are capable of! The full lists of APIs of :obj:`RDKitMol ` and :obj:`Reaction ` are provided in this documentation. + +We also provided a few notebooks (available in ``\ipython`` and `Colab `_) to demonstrate the usage of RDMC. Please feel invited to try them out! + +RDMC is developed by + +- Xiaorui Dong (|github_xiaorui|_ \| |linkedin_xiaorui|_ \| |gs_xiaorui|_), +- Dr. Lagnajit Pattanaik (|github_lucky|_ \| |linkedin_lucky|_ \| |gs_lucky|_), +- Dr. Shih-Cheng Li (|github_shihcheng|_ \| |linkedin_shihcheng|_ \| |gs_shihcheng|_), +- Dr. Kevin Spiekermann (|github_kevin|_ \| |linkedin_kevin|_ \| |gs_kevin|_), +- Hao-Wei Pang (|github_haowei|_ \| |linkedin_haowei|_ \| |gs_haowei|_), +- Prof. William H. Green (|linkedin_bill|_ \| |gs_bill|_) + +at `Green Research Group `_ at `Massachusetts Institute of Technology (MIT) `_. +For any questions while using RDMC, please contact us via the `GitHub issue page `_ or email us at `rdmc_dev@mit.edu `_. Contents ======== @@ -19,11 +59,56 @@ Contents :maxdepth: 2 reference/rdmc - license + reference/credits + reference/cite + reference/license -Indices and tables +APIs =================== * :ref:`genindex` * :ref:`modindex` -* :ref:`search` + + +.. |github| image:: _static/GitHub_icon.svg + :class: social-icon +.. |github_xiaorui| replace:: |github| +.. _github_xiaorui: https://github.com/xiaoruiDong +.. |github_lucky| replace:: |github| +.. _github_lucky: https://github.com/PattanaikL +.. |github_shihcheng| replace:: |github| +.. _github_shihcheng: https://github.com/shihchengli +.. |github_kevin| replace:: |github| +.. _github_kevin: https://github.com/kspieks +.. |github_haowei| replace:: |github| +.. _github_haowei: https://github.com/hwpang + +.. |linkedin| image:: _static/LinkedIn_icon.svg + :class: social-icon +.. |linkedin_xiaorui| replace:: |linkedin| +.. _linkedin_xiaorui: https://www.linkedin.com/in/xiaorui-dong/ +.. |linkedin_lucky| replace:: |linkedin| +.. _linkedin_lucky: https://www.linkedin.com/in/lagnajit-pattanaik-94a564108/ +.. |linkedin_shihcheng| replace:: |linkedin| +.. _linkedin_shihcheng: https://www.linkedin.com/in/shih-cheng-li-564006207/ +.. |linkedin_kevin| replace:: |linkedin| +.. _linkedin_kevin: https://www.linkedin.com/in/kspiekermann/ +.. |linkedin_haowei| replace:: |linkedin| +.. _linkedin_haowei: https://www.linkedin.com/in/hao-wei-pang/ +.. |linkedin_bill| replace:: |linkedin| +.. _linkedin_bill: https://www.linkedin.com/in/william-green-63a9a218/ + +.. |google_scholar| image:: _static/Google_Scholar_icon.svg + :class: social-icon +.. |gs_xiaorui| replace:: |google_scholar| +.. _gs_xiaorui: https://scholar.google.com/citations?hl=en&user=r5Wz41EAAAAJ +.. |gs_lucky| replace:: |google_scholar| +.. _gs_lucky: https://scholar.google.com/citations?hl=en&user=bVT6lpwAAAAJ +.. |gs_shihcheng| replace:: |google_scholar| +.. _gs_shihcheng: https://scholar.google.com/citations?hl=en&user=kc_rvjoAAAAJ +.. |gs_kevin| replace:: |google_scholar| +.. _gs_kevin: https://scholar.google.com/citations?hl=en&user=qg2LmbgAAAAJ +.. |gs_haowei| replace:: |google_scholar| +.. _gs_haowei: https://scholar.google.com/citations?hl=en&user=hmkEmtcAAAAJ +.. |gs_bill| replace:: |google_scholar| +.. _gs_bill: https://scholar.google.com/citations?hl=en&user=PGQTLWwAAAAJ diff --git a/docs/source/reference/cite.rst b/docs/source/reference/cite.rst new file mode 100644 index 00000000..21f1962d --- /dev/null +++ b/docs/source/reference/cite.rst @@ -0,0 +1,21 @@ +How to cite RDMC +================ + +Text form + +.. code-block:: tex + + Dong, X., Pattanaik, L., Li, S.-C., Spiekermann, K., Pang, H.-W., Green, William H (2023). RDMC: Reaction Data and Molecular Conformer (Version 0.1.0) [Computer software]. https://github.com/xiaoruiDong/RDMC + + +BibTeX form + +.. code-block:: tex + + @misc{RDMC, + author = {Dong, Xiaorui and Pattanaik, Lagnajit and Li, Shih-Cheng and Spiekermann, Kevin and Pang, Hao-Wei and Green, William H.}, + title = {RDMC: Reaction Data and Molecular Conformer Software Package, version 0.1.0}, + year = {2023}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/xiaoruiDong/RDMC}}} + diff --git a/docs/source/reference/credits.rst b/docs/source/reference/credits.rst new file mode 100644 index 00000000..632b3040 --- /dev/null +++ b/docs/source/reference/credits.rst @@ -0,0 +1,57 @@ +Credits +======== + +**RDMC** is developed at the `Green Research Group `_ at MIT. + +The main contributors are: + +- Xiaorui Dong (|github_xiaorui|_ \| |linkedin_xiaorui|_ \| |gs_xiaorui|_), +- Dr. Lagnajit Pattanaik (|github_lucky|_ \| |linkedin_lucky|_ \| |gs_lucky|_), +- Dr. Shih-Cheng Li (|github_shihcheng|_ \| |linkedin_shihcheng|_ \| |gs_shihcheng|_), +- Dr. Kevin Spiekermann (|github_kevin|_ \| |linkedin_kevin|_ \| |gs_kevin|_), +- Hao-Wei Pang (|github_haowei|_ \| |linkedin_haowei|_ \| |gs_haowei|_), +- Prof. William H. Green (|linkedin_bill|_ \| |gs_bill|_) + + +.. |github| image:: ../_static/GitHub_icon.svg + :class: social-icon +.. |github_xiaorui| replace:: |github| +.. _github_xiaorui: https://github.com/xiaoruiDong +.. |github_lucky| replace:: |github| +.. _github_lucky: https://github.com/PattanaikL +.. |github_shihcheng| replace:: |github| +.. _github_shihcheng: https://github.com/shihchengli +.. |github_kevin| replace:: |github| +.. _github_kevin: https://github.com/kspieks +.. |github_haowei| replace:: |github| +.. _github_haowei: https://github.com/hwpang + +.. |linkedin| image:: ../_static/LinkedIn_icon.svg + :class: social-icon +.. |linkedin_xiaorui| replace:: |linkedin| +.. _linkedin_xiaorui: https://www.linkedin.com/in/xiaorui-dong/ +.. |linkedin_lucky| replace:: |linkedin| +.. _linkedin_lucky: https://www.linkedin.com/in/lagnajit-pattanaik-94a564108/ +.. |linkedin_shihcheng| replace:: |linkedin| +.. _linkedin_shihcheng: https://www.linkedin.com/in/shih-cheng-li-564006207/ +.. |linkedin_kevin| replace:: |linkedin| +.. _linkedin_kevin: https://www.linkedin.com/in/kspiekermann/ +.. |linkedin_haowei| replace:: |linkedin| +.. _linkedin_haowei: https://www.linkedin.com/in/hao-wei-pang/ +.. |linkedin_bill| replace:: |linkedin| +.. _linkedin_bill: https://www.linkedin.com/in/william-green-63a9a218/ + +.. |google_scholar| image:: ../_static/Google_Scholar_icon.svg + :class: social-icon +.. |gs_xiaorui| replace:: |google_scholar| +.. _gs_xiaorui: https://scholar.google.com/citations?hl=en&user=r5Wz41EAAAAJ +.. |gs_lucky| replace:: |google_scholar| +.. _gs_lucky: https://scholar.google.com/citations?hl=en&user=bVT6lpwAAAAJ +.. |gs_shihcheng| replace:: |google_scholar| +.. _gs_shihcheng: https://scholar.google.com/citations?hl=en&user=kc_rvjoAAAAJ +.. |gs_kevin| replace:: |google_scholar| +.. _gs_kevin: https://scholar.google.com/citations?hl=en&user=qg2LmbgAAAAJ +.. |gs_haowei| replace:: |google_scholar| +.. _gs_haowei: https://scholar.google.com/citations?hl=en&user=hmkEmtcAAAAJ +.. |gs_bill| replace:: |google_scholar| +.. _gs_bill: https://scholar.google.com/citations?hl=en&user=PGQTLWwAAAAJ diff --git a/docs/source/license.rst b/docs/source/reference/license.rst similarity index 70% rename from docs/source/license.rst rename to docs/source/reference/license.rst index 6c338fae..17b86c52 100644 --- a/docs/source/license.rst +++ b/docs/source/reference/license.rst @@ -5,4 +5,4 @@ Licence RDMC is distributed free of charge under the MIT license. - .. include:: ../../LICENSE.md + .. include:: ../../../LICENSE.md From 3ba772543debc087b47f959b9943e29b126b511b Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 12:15:08 -0400 Subject: [PATCH 14/21] Correct docstrings in mol.py --- rdmc/mol.py | 346 +++++++++++++++++++++++++++++----------------------- 1 file changed, 196 insertions(+), 150 deletions(-) diff --git a/rdmc/mol.py b/rdmc/mol.py index 9387aa47..92176fd0 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -45,17 +45,17 @@ class RDKitMol(object): """ - A helpful wrapper for rdchem.Mol. + A helpful wrapper for ``Chem.rdchem.RWMol``. The method nomenclature follows the Camel style to be consistent with RDKit. - It keeps almost all of the orignal method of Chem.rdchem.Mol/RWMol, but add few useful - shortcuts, so that a user doesn't need to refer to other RDKit modules. + It keeps almost all of the original methods of ``Chem.rdchem.RWMol`` but has a few useful + shortcuts so that users don't need to refer to other RDKit modules. """ def __init__(self, mol: Union[Mol, RWMol], keepAtomMap: bool = True): """ - Generate an RDKitMol Molecule instance from a RDKit ``Chem.rdchem.Mol`` or ``RWMol`` molecule. + Generate an ``RDKitMol`` molecule object instance from a RDKit ``Chem.rdchem.Mol`` or ``RWMol`` molecule. Args: mol (Union[Mol, RWMol]): The RDKit ``Chem.rdchem.Mol`` / ``RWmol`` molecule to be converted. @@ -93,12 +93,14 @@ def __init__(self, def AddNullConformer(self, confId: Optional[int] = None, - random: bool = True): + random: bool = True, + ) -> None: """ - Embed null conformer to existing RDKit mol. + Embed a conformer with atoms' coordinates of random numbers or with all atoms + located at the origin to the current `RDKitMol`. Args: - confId (int, optional): Which ID to set for the conformer (will add as last conformer by default). + confId (int, optional): Which ID to set for the conformer (will be added as the last conformer by default). random (bool, optional): Whether set coordinates to random numbers. Otherwise, set to all-zero coordinates. Defaults to ``True``. """ @@ -111,14 +113,16 @@ def AddNullConformer(self, conf.SetId(confId) self._mol.AddConformer(conf) - def AddRedundantBonds(self, bonds: Iterable): + def AddRedundantBonds(self, + bonds: Iterable, + ) -> 'RDKitMol': """ Add redundant bonds (not originally exist in the molecule) for - the convenience for some molecule operation or analysis. This function + facilitating a few molecule operation or analyses. This function will only generate a copy of the molecule and no change is conducted inplace. Args: - bonds: a list of length-2 Iterables containing the index of the ended atoms. + bonds: a list of length-2 Iterables containing the indexes of the ended atoms. """ mol_cp = self.Copy() for bond in bonds: @@ -138,8 +142,8 @@ def AlignMol(self, ) -> float: """ Align molecules based on a reference molecule. This function will also return the RMSD value for the best alignment. - When leaving both ``prbMol`` and ``refMol`` blank, then the function will match the current molecules' conformers, and - PrbCid or refCid must be provided. + When leaving both ``prbMol`` and ``refMol`` blank, the function will align current molecule's conformers, and + ``PrbCid`` or ``refCid`` must be provided. Args: refMol (Mol): RDKit molecule as a reference. Should not be provided with ``prbMol``. @@ -148,10 +152,10 @@ def AlignMol(self, refCid (int, optional): The id of reference conformer. Defaults to ``0``. reflect (bool, optional): Whether to reflect the conformation of the probe molecule. Defaults to ``False``. - atomMap (list, optional): a vector of pairs of atom IDs ``(probe AtomId, ref AtomId)`` + atomMap (list, optional): A vector of pairs of atom IDs ``(prb AtomId, ref AtomId)`` used to compute the alignments. If this mapping is not - specified an attempt is made to generate on by substructure matching. - maxIters (int, optional): maximum number of iterations used in mimizing the RMSD. Defaults to ``1000``. + specified, an attempt is made to generate on by substructure matching. + maxIters (int, optional): Maximum number of iterations used in minimizing the RMSD. Defaults to ``1000``. Returns: float: RMSD value. @@ -193,20 +197,23 @@ def CalcRMSD(self, reflect: bool = False, atomMaps: Optional[list] = None, weights: list = [], - ): + ) -> float: """ - Calculate the RMSD for between conformers of two molecules. Note this function will not align molecule, thus molecules geometries - in the calculation are not translated or rotated. You can expect a larger number compared to the RMSD from AlignMol. + Calculate the RMSD between conformers of two molecules. Note this function will not align conformers, thus molecules' geometries + are not translated or rotated during the calculation. You can expect a larger number compared to the RMSD from :func:`~RDKitMol.AlignMol`. Args: - prbMol ('RDKitMol'): The other molecule to compare with. This can be the instance as the current molecule. - prbCid (int, optional): The conformer ID of the current molecule to calculate RMSD. Defaults to 0. - refCid (int, optional): The conformer ID of the other molecule to calculate RMSD. Defaults to 0. - reflect (bool, optional): Whether to reflect the conformation of the prbMol. Defaults to ``False``. - atomMaps (list, optional): Provide an atom mapping to calculate the RMSD. By default, prbMol and current molecule - will be assumed to have the same atom order. + prbMol (RDKitMol): The other molecule to compare with. It can be set to the current molecule. + prbCid (int, optional): The conformer ID of the current molecule to calculate RMSD. Defaults to ``0``. + refCid (int, optional): The conformer ID of the other molecule to calculate RMSD. Defaults to ``0``. + reflect (bool, optional): Whether to reflect the conformation of the ``prbMol``. Defaults to ``False``. + atomMaps (list, optional): Provide an atom mapping to calculate the RMSD. By default, ``prbMol`` and current molecule + are assumed to have the same atom order. weights (list, optional): Specify weights to each atom pairs. E.g., use atom weights to highlight the importance of - heavy atoms. + heavy atoms. Defaults to ``[]`` for using unity weights. + + Returns: + float: RMSD value. """ if atomMaps is None: atomMaps = [list(enumerate(range(self.GetNumAtoms())))] @@ -230,7 +237,10 @@ def CalcRMSD(self, def AssignStereochemistryFrom3D(self, confId: int = 0): """ - Assign the chiraltype to a molecule's atoms. + Assign the chirality type to a molecule's atoms. + + Args: + confId (int, optional): The ID of the conformer whose geometry is used to determine the chirality. Defaults to ``0``. """ Chem.rdmolops.AssignStereochemistryFrom3D(self._mol, confId=confId) @@ -240,25 +250,25 @@ def CombineMol(self, c_product: bool = False, ) -> 'RDKitMol': """ - A function to combine the current molecule with the given ``molFrag`` (another molecule - or fragment). It will return a new RDKitMol instance without changing the input molecules. + Combine the current molecule with the given ``molFrag`` (another molecule + or fragment). A new object instance will be created and changes are not made to the current molecule. Args: - molFrag (RDKitMol, Mol): the molecule or fragment to be combined into the current one. + molFrag (RDKitMol or Mol): The molecule or fragment to be combined into the current one. offset: - - (list, tuple): A 3D vector used to define the offset. - - (float): Distance in Angstrom between the current mol and the `molFrag` along the x axis. - c_product (bool, optional): If `True`, generate conformers for every possible combination - between the current molecule and the `molFrag`. E.g., - (1,1), (1,2), ... (1,n), (2,1), ...(m,1), ... (m,n) - Defaults to `False`, meaning it generates conformers pairwise. E.g., - (1,1), (2,2), ... - When `c_product=True`, N(conformer) = m x n. - When `c_product=False`, if the current molecule has 0 conformer, N(conformer) - will be equal to the N(conformer) of `molFrag`; Otherwise, N(conformer) - will be equal to the N(conformer) of the current molecule object. Some - coordinates may be filled by zeros if the current molecule and `molFrag` - have different N(conformer). + - (list or tuple): A 3-element vector used to define the offset. + - (float): Distance in Angstrom between the current mol and the ``molFrag`` along the x axis. + c_product (bool, optional): If ``True``, generate conformers for every possible combination + between the current molecule and the ``molFrag``. E.g., + (1,1), (1,2), ... (1,n), (2,1), ...(m,1), ... (m,n). :math:`N(conformer) = m \\times n.` + + Defaults to ``False``, meaning only generate conformers according to + (1,1), (2,2), ... When ``c_product`` is set to ``False``, if the current + molecule has 0 conformer, conformers will be embedded to the current molecule first. + The number of conformers of the combined molecule will be equal to the number of conformers + of ``molFrag``. Otherwise, the number of conformers of the combined molecule will be equal + to the number of conformers of the current molecule. Some coordinates may be filled by 0s, + if the current molecule and ``molFrag`` have different numbers of conformers. Returns: RDKitMol: The combined molecule. @@ -301,12 +311,12 @@ def Copy(self, copy_attrs: Optional[list] = None, ) -> 'RDKitMol': """ - Make a copy of the RDKitMol. + Make a copy of the current ``RDKitMol``. Args: - quickCopy (bool, optional): Use the quick copy mode without copying conformers. Defaults to False. - confId (int, optional): The conformer ID to be copied. Defaults to -1, meaning all conformers. - copy_attrs (list, optional): copy specific attributes to the new mol + quickCopy (bool, optional): Use the quick copy mode without copying conformers. Defaults to ``False``. + confId (int, optional): The conformer ID to be copied. Defaults to ``-1``, meaning all conformers. + copy_attrs (list, optional): Copy specific attributes to the new molecule. Defaults to ``None``. Returns: RDKitMol: a copied molecule @@ -322,15 +332,15 @@ def EmbedConformer(self, **kwargs): """ Embed a conformer to the ``RDKitMol``. This will overwrite current conformers. By default, it - will first try embeding a 3D conformer; if failed, it will then try to compute 2D coordinates - and use that for the conformer structure; if both approaches fail and allow embedding a null - conformer, then a conformer with all zero coordinates will be embedded. The last one is still - helpful if you have the coordinates, so you can use `SetPositions` to set them, or if you want to - optimize the geometry using things like forcefields. + will first try embedding a 3D conformer; if fails, it then try to compute 2D coordinates + and use that for the conformer structure; if both approaches fail, and embedding a null + conformer is allowed, a conformer with all zero coordinates will be embedded. The last one is + helpful for the case where you can use `SetPositions` to set their positions afterward, or if you want to + optimize the geometry using force fields. Args: - embed_null (bool): If Embed 3D and 2D coordinates fails, whether to embed a conformer - with all null coordinates: (0, 0, 0) for each atom. Defaults to ``True``. + embed_null (bool): If embedding 3D and 2D coordinates fails, whether to embed a conformer + with all null coordinates, ``(0, 0, 0)``, for each atom. Defaults to ``True``. """ try: return_code = AllChem.EmbedMolecule(self._mol, **kwargs) @@ -352,12 +362,12 @@ def EmbedMultipleConfs(self, embed_null: bool = True, **kwargs): """ - Embed conformers to the ``RDKitMol``. This will overwrite current conformers. By default, it - will first try embeding 3D conformers; if failed, it will then try to compute 2D coordinates - and use that for the conformer structures; if both approaches fail and allow embedding null - conformers, then conformers with all zero coordinates will be embedded. The last one is still - helpful if you have the coordinates, so you can use `SetPositions` to set them, or you want to - optimize the geometry using things like forcefields. + Embed multiple conformers to the ``RDKitMol``. This will overwrite current conformers. By default, it + will first try embedding a 3D conformer; if fails, it then try to compute 2D coordinates + and use that for the conformer structure; if both approaches fail, and embedding a null + conformer is allowed, a conformer with all zero coordinates will be embedded. The last one is + helpful for the case where you can use `SetPositions` to set their positions afterward, or if you want to + optimize the geometry using force fields. Args: n (int): The number of conformers to be embedded. The default is ``1``. @@ -377,10 +387,10 @@ def EmbedMultipleConfs(self, def EmbedNullConformer(self, random: bool = True): """ - Embed a conformer with meaningless atom coordinates. This helps the cases where a conformer - can not be successfully embeded. You can choose to generate all zero coordinates or random coordinates. + Embed a conformer with null or random atom coordinates. This helps the cases where a conformer + can not be successfully embedded. You can choose to generate all zero coordinates or random coordinates. You can set to all-zero coordinates, if you will set coordinates later; You should set to random - coordinates, if you want to optimize this molecule by forcefield (RDKit force field cannot optimize + coordinates, if you want to optimize this molecule by force fields (RDKit force field cannot optimize all-zero coordinates). Args: @@ -393,10 +403,10 @@ def EmbedMultipleNullConfs(self, n: int = 10, random: bool = True): """ - Embed conformers with meaningless atom coordinates. This helps the cases where a conformer + Embed conformers with null or random atom coordinates. This helps the cases where a conformer can not be successfully embedded. You can choose to generate all zero coordinates or random coordinates. You can set to all-zero coordinates, if you will set coordinates later; You should set to random - coordinates, if you want to optimize this molecule by forcefield (RDKit force field cannot optimize + coordinates, if you want to optimize this molecule by force fields (RDKit force field cannot optimize all-zero coordinates). Args: @@ -445,12 +455,12 @@ def FromMol(cls, Args: rdmol (Union[Mol, RWMol]): The RDKit ``Chem.rdchem.Mol`` / ``RWMol`` molecule to be converted. - keepAtomMap (bool, optional): Whether keep the original atom mapping. Defaults to True. + keepAtomMap (bool, optional): Whether keep the original atom mapping. Defaults to ``True``. If no atom mapping is stored in the molecule, atom mapping will be created based on atom indexes. Returns: - RDKitMol: An RDKitMol molecule. + RDKitMol: RDKitMol molecule converted from the input RDKit ``Chem.rdchem.Mol`` molecule. """ return cls(mol, keepAtomMap=keepAtomMap) @@ -464,7 +474,7 @@ def FromSmiles(cls, keepAtomMap: bool = True, ) -> 'RDKitMol': """ - Convert a SMILES to an ``RDkitMol`` object. + Convert a SMILES string to an ``RDkitMol`` object. Args: smiles (str): A SMILES representation of the molecule. @@ -512,12 +522,16 @@ def FromSmiles(cls, @classmethod def FromSmarts(cls, - smarts): + smarts: str, + ) -> 'RDKitMol': """ Convert a SMARTS to an ``RDKitMol`` object. Args: smarts (str): A SMARTS string of the molecule + + Returns: + RDKitMol: An RDKit molecule object corresponding to the SMARTS. """ mol = Chem.MolFromSmarts(smarts) return cls(mol) @@ -530,14 +544,14 @@ def FromInchi(cls, sanitize: bool = True, ): """ - Construct a molecule from a InChI string + Construct an ``RDKitMol`` object from a InChI string. Args: - inchi (str): a InChI string. https://en.wikipedia.org/wiki/International_Chemical_Identifier + inchi (str): A InChI string. https://en.wikipedia.org/wiki/International_Chemical_Identifier removeHs (bool, optional): Whether to remove hydrogen atoms from the molecule, Due to RDKit implementation, only effective when sanitize is ``True`` as well. ``True`` to remove. addHs (bool, optional): Whether to add explicit hydrogen atoms to the molecule. ``True`` to add. - Only functioning when removeHs is False. + Only functioning when ``removeHs`` is ``False``. sanitize (bool, optional): Whether to sanitize the RDKit molecule, ``True`` to sanitize. Returns: @@ -585,20 +599,21 @@ def FromXYZ(cls, Args: xyz (str): A XYZ String. - backend (str): The backend used to perceive molecule. Defaults to ``openbabel``. - Currently, we only support ``openbabel`` and ``jensen``. + backend (str): The backend used to perceive molecule. Defaults to ``'openbabel'``. + Currently, we only support ``'openbabel'`` and ``'jensen'``. header (bool, optional): If lines of the number of atoms and title are included. Defaults to ``True.`` - sanitize (bool): Sanitize the RDKit mol created using openbabel or not. Helpful to set this to False - when reading in TSs. Defaults to ``True.`` - embed_chiral: ``True`` to embed chiral information. Defaults to True. + correctCO (bool, optional): Whether to correct the CO bond as "[C-]#[O+]". Defaults to ``True``. + sanitize (bool): Sanitize the RDKit molecule during conversion. Helpful to set it to ``False`` + when reading in TSs. Defaults to ``True``. + embed_chiral: ``True`` to embed chiral information. Defaults to ``True``. supported kwargs: jensen: - charge: The charge of the species. Defaults to ``0``. - - allow_charged_fragments: ``True`` for charged fragment, ``False`` for radical. Defaults to False. - - use_graph: ``True`` to use networkx module for accelerate. Defaults to True. - - use_huckel: ``True`` to use extended Huckel bond orders to locate bonds. Defaults to False. - - forced_rdmc: Defaults to False. In rare case, we may hope to use a tailored + - allow_charged_fragments: ``True`` for charged fragment, ``False`` for radical. Defaults to ``False``. + - use_graph: ``True`` to use networkx module for accelerate. Defaults to ``True``. + - use_huckel: ``True`` to use extended Huckel bond orders to locate bonds. Defaults to ``False``. + - forced_rdmc: Defaults to ``False``. In rare case, we may hope to use a tailored version of the Jensen XYZ parser, other than the one available in RDKit. Set this argument to ``True`` to force use RDMC's implementation, which user's may have some flexibility to modify. @@ -637,16 +652,16 @@ def FromSDF(cls, sanitize: bool = True, ) -> 'RDKitMol': """ - Convert xyz string to RDKitMol. + Convert an SDF string to RDKitMol. Args: sdf (str): An SDF string. - removeHs (bool): Whether or not to remove hydrogens from the input (defaults to False) + removeHs (bool): Whether or not to remove hydrogens from the input. Defaults to ``False``. sanitize (bool): Whether or not to use RDKit's sanitization algorithm to clean input; helpful to set this - to False when reading TS files (defaults to True) + to ``False`` when reading TS files. Defaults to ``True``. Returns: - RDKitMol: An RDKit molecule object corresponding to the sdf. + RDKitMol: An RDKit molecule object corresponding to the SDF string. """ mol = Chem.MolFromMolBlock(sdf, removeHs=removeHs, sanitize=sanitize) return cls(mol) @@ -663,19 +678,19 @@ def FromFile(cls, **kwargs ) -> 'RDKitMol': """ - Read RDKitMol from file. + Read RDKitMol from a file. Args: path (str): File path to data. - backend (str): The backend used to perceive molecule. Defaults to ``openbabel``. - Currently, we only support ``openbabel`` and ``jensen``. + backend (str, optional): The backend used to perceive molecule. Defaults to ``'openbabel'``. + Currently, we only support ``'openbabel'`` and ``'jensen'``. header (bool, optional): If lines of the number of atoms and title are included. Defaults to ``True.`` - removeHs (bool): Whether or not to remove hydrogens from the input (defaults to False) + removeHs (bool): Whether or not to remove hydrogens from the input. Defaults to ``False``. sanitize (bool): Whether or not to use RDKit's sanitization algorithm to clean input; helpful to set this - to False when reading TS files (defaults to True) + to ``False`` when reading TS files. Defaults to ``True``. sameMol (bool): Whether or not all the conformers in the (sdf) file are for the same mol, in which case - we will copy conformers directly to the mol (defaults to False) + we will copy conformers directly to the mol. Defaults to ``False``. Returns: RDKitMol: An RDKit molecule object corresponding to the file. @@ -709,8 +724,8 @@ def GetAdjacencyMatrix(self): Get the adjacency matrix of the molecule. Returns: - numpy.ndarray: A square adjacency matrix of the molecule, where a "1" indicates that atoms are bonded - and a "0" indicates that atoms aren't bonded + numpy.ndarray: A square adjacency matrix of the molecule, where a `1` indicates that atoms are bonded + and a `0` indicates that atoms aren't bonded. """ return Chem.GetAdjacencyMatrix(self._mol) @@ -731,7 +746,7 @@ def GetBestAlign(self, maxIters: int = 1000, keepBestConformer: bool = True): """ - This is a wrapper function for calling `AlignMol` twice, with ``reflect`` to ``True`` + This is a wrapper function for calling ``AlignMol`` twice, with ``reflect`` to ``True`` and ``False``, respectively. Args: @@ -743,7 +758,7 @@ def GetBestAlign(self, atomMap (list, optional): a vector of pairs of atom IDs ``(probe AtomId, ref AtomId)`` used to compute the alignments. If this mapping is not specified an attempt is made to generate on by substructure matching. - maxIters (int, optional): maximum number of iterations used in mimizing the RMSD. Defaults to ``1000``. + maxIters (int, optional): maximum number of iterations used in minimizing the RMSD. Defaults to ``1000``. keepBestConformer (bool, optional): Whether to keep the best Conformer structure. Defaults to ``True``. This is less helpful when you are comparing different atom mappings. @@ -784,14 +799,16 @@ def GetBestAlign(self, return rmsd, reflect - def GetBondsAsTuples(self): + def GetBondsAsTuples(self) -> List[tuple]: """ - Generate a list of length-2 sets indicating the bonding atoms - in the molecule + Generate a list of length-2 sets indicating the bonding atoms in the molecule. + + Returns: + list: A list of length-2 sets indicating the bonding atoms. """ return [tuple(sorted((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()))) for bond in self.GetBonds()] - def GetElementSymbols(self): + def GetElementSymbols(self) -> List[str]: """ Get the element symbols of the molecules. The element symbols are sorted by the atom indexes. @@ -800,7 +817,7 @@ def GetElementSymbols(self): """ return get_element_symbols(self.GetAtomicNumbers()) - def GetAtomMasses(self): + def GetAtomMasses(self) -> List[float]: """ Get the mass of each atom. The order is consistent with the atom indexes. @@ -868,6 +885,16 @@ def GetAllConformers(self) -> List['RDKitConf']: return self.GetConformers(list(range(self.GetNumConformers()))) def GetDistanceMatrix(self, id: int = 0) -> np.ndarray: + """ + Get the distance matrix of the molecule. + + Args: + id (int, optional): The conformer ID to extract distance matrix from. + Defaults to ``0``. + + Returns: + np.ndarray: A square distance matrix of the molecule. + """ return Chem.rdmolops.Get3DDistanceMatrix(self._mol, confId=id) def GetPositions(self, id: int = 0) -> np.ndarray: @@ -884,12 +911,12 @@ def GetPositions(self, id: int = 0) -> np.ndarray: conf = self.GetConformer(id=id) return conf.GetPositions() - def GetSymmSSSR(self): + def GetSymmSSSR(self) -> tuple: """ Get a symmetrized SSSR for a molecule. Returns: - tuple: a sequence of sequences containing the rings found as atom ids + tuple: A sequence of sequences containing the rings found as atom IDs. """ return Chem.GetSymmSSSR(self._mol) @@ -902,9 +929,9 @@ def GetSubstructMatch(self, Returns the indices of the molecule's atoms that match a substructure query. Args: - query (Mol): a RDkit Molecule. - useChirality (bool, optional): enables the use of stereochemistry in the matching. Defaults to ``False``. - useQueryQueryMatches (bool, optional): use query-query matching logic. Defaults to ``False``. + query (Mol): An RDkit Molecule. + useChirality (bool, optional): Enables the use of stereochemistry in the matching. Defaults to ``False``. + useQueryQueryMatches (bool, optional): Use query-query matching logic. Defaults to ``False``. Returns: tuple: A tuple of matched indices. @@ -926,10 +953,11 @@ def GetSubstructMatches(self, Args: query (Mol): a Molecule. - uniquify (bool, optional): determines whether or not the matches are uniquified. Defaults to True. - useChirality (bool, optional): enables the use of stereochemistry in the matching. Defaults to False. - useQueryQueryMatches (bool, optional): use query-query matching logic. Defaults to False. + uniquify (bool, optional): determines whether or not the matches are uniquified. Defaults to ``True``. + useChirality (bool, optional): enables the use of stereochemistry in the matching. Defaults to ``False``. + useQueryQueryMatches (bool, optional): use query-query matching logic. Defaults to ``False``. maxMatches: The maximum number of matches that will be returned to prevent a combinatorial explosion. + Defaults to ``1000``. Returns: tuple: A tuple of tuples of matched indices. @@ -948,20 +976,20 @@ def GetMolFrags(self, fragsMolAtomMapping: Optional[list] = None, ) -> tuple: """ - Finds the disconnected fragments from a molecule. For example, for the molecule ‘CC(=O)[O-].[NH3+]C’, - this function will split the molecules into a list of ‘CC(=O)[O-]' and '[NH3+]C'. By defaults, + Finds the disconnected fragments from a molecule. For example, for the molecule "CC(=O)[O-].[NH3+]C", + this function will split the molecules into a list of "CC(=O)[O-]" and "[NH3+]C". By defaults, this function will return a list of atom mapping, but options are available for getting mols. Args: - asMols (bool, optional): Whether the fragments will be returned as molecules instead of atom ids. + asMols (bool, optional): Whether the fragments will be returned as molecules instead of atom IDs. Defaults to ``True``. sanitize (bool, optional): Whether the fragments molecules will be sanitized before returning them. Defaults to ``True``. - frags (list, optional): If this is provided as an empty list, the result will be mol.GetNumAtoms() + frags (list, optional): If this is provided as an empty list, the result will be ``mol.GetNumAtoms()`` long on return and will contain the fragment assignment for each Atom. - fragsMolAtomMapping (list, optional): If this is provided as an empty list, the result will be a + fragsMolAtomMapping (list, optional): If this is provided as an empty list (``[]``), the result will be a numFrags long list on return, and each entry will contain the - indices of the Atoms in that fragment: [(0, 1, 2, 3), (4, 5)].values + indices of the Atoms in that fragment: [(0, 1, 2, 3), (4, 5)]. Returns: tuple: a tuple of atom mapping or a tuple of split molecules (RDKitMol). @@ -991,13 +1019,15 @@ def GetTorsionalModes(self, torsions += find_ring_torsions(self._mol) return torsions - def GetVdwMatrix(self, threshold=0.4) -> Optional[np.ndarray]: + def GetVdwMatrix(self, + threshold: float = 0.4, + ) -> Optional[np.ndarray]: """ Get the derived Van der Waals matrix, which can be used to analyze the collision of atoms. More information can be found from ``generate_vdw_mat``. Args: - threshold: float indicating the threshold to use in the vdw matrix + threshold: A float indicating the threshold to use in the vdw matrix. Defaults to ``0.4``. Returns: Optional[np.ndarray]: A 2D array of the derived Van der Waals Matrix, if the @@ -1010,11 +1040,16 @@ def GetVdwMatrix(self, threshold=0.4) -> Optional[np.ndarray]: return self._vdw_mat def HasCollidingAtoms(self, - threshold=0.4, + threshold: float = 0.4, ) -> bool: """ + Check whether the molecule has colliding atoms. + Args: - threshold: float indicating the threshold to use in the vdw matrix + threshold: A float indicating the threshold to use in the vdw matrix. Defaults to ``0.4``. + + Returns: + bool: Whether the molecule has colliding atoms. """ dist_mat = np.triu(self.GetDistanceMatrix()) @@ -1032,7 +1067,7 @@ def HasSameConnectivity(self, Args: confId (int, optional): The conformer ID. Defaults to ``0``. - backend (str, optional): The backend to use for the comparison. Defaults to ``openbabel``. + backend (str, optional): The backend to use for the comparison. Defaults to ``'openbabel'``. **kwargs: The keyword arguments to pass to the backend. Returns: @@ -1067,7 +1102,8 @@ def Kekulize(self, Kekulizes the molecule. Args: - clearAromaticFlags (optional): if `True`, all atoms and bonds in the molecule will be marked non-aromatic following the kekulization. Defaults to `False`. + clearAromaticFlags (optional): If ``True``, all atoms and bonds in the molecule will be marked non-aromatic + following the kekulization. Defaults to ``False``. """ Chem.KekulizeIfPossible(self._mol, clearAromaticFlags=clearAromaticFlags) @@ -1083,14 +1119,14 @@ def PrepareOutputMol(self, if explicitly added, will be included and reduce the readablity. Defaults to ``False``. Note, following Hs are not removed: - 1. H which aren’t connected to a heavy atom. E.g.,[H][H]. + 1. H which aren't connected to a heavy atom. E.g.,[H][H]. 2. Labelled H. E.g., atoms with atomic number=1, but isotope > 1. 3. Two coordinate Hs. E.g., central H in C[H-]C. 4. Hs connected to dummy atoms 5. Hs that are part of the definition of double bond Stereochemistry. 6. Hs that are not connected to anything else. - sanitize (bool, optional): whether to sanitize the molecule. Defaults to ``True``. + sanitize (bool, optional): Whether to sanitize the molecule. Defaults to ``True``. Returns: Mol: A Mol instance used for output purpose. @@ -1105,7 +1141,7 @@ def PrepareOutputMol(self, def RemoveHs(self, sanitize: bool = True): """ - Remove H atoms. Useful when trying to match heavy atoms.py + Remove H atoms. Useful when trying to match heavy atoms. Args: sanitize (bool, optional): Whether to sanitize the molecule. Defaults to ``True``. @@ -1121,12 +1157,12 @@ def RenumberAtoms(self, Args: newOrder (list, optional): the new ordering the atoms (should be numAtoms long). E.g, - if newOrder is [3,2,0,1], then atom 3 in the original molecule - will be atom 0 in the new one. If no value provided, then the molecule + if newOrder is ``[3,2,0,1]``, then atom ``3`` in the original molecule + will be atom ``0`` in the new one. If no value provided, then the molecule will be renumbered based on the current atom map numbers. The latter is helpful when the sequence of atom map numbers and atom indexes are inconsistent. updateAtomMap (bool): Whether to update the atom map number based on the - new order. + new order. Defaults to ``True``. Returns: RDKitMol: Molecule with reordered atoms. @@ -1150,8 +1186,8 @@ def Sanitize(self, Args: sanitizeOps (int or str, optional): Sanitize operations to be carried out. Defaults to - SanitizeFlags.SANITIZE_ALL. More details can be found at - https://www.rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=sanitize#rdkit.Chem.rdmolops.SanitizeFlags. + ``SanitizeFlags.SANITIZE_ALL``. More details can be found at + `RDKit docs `_. """ Chem.rdmolops.SanitizeMol(self._mol, sanitizeOps) @@ -1193,7 +1229,7 @@ def Reflect(self, Reflect the atom coordinates of a molecule, and therefore its mirror image. Args: - id (int, optional): The conformer id to reflect. + id (int, optional): The conformer id to reflect. Defaults to ``0``. """ Chem.rdMolAlign.AlignMol(refMol=self._mol, prbMol=self._mol, @@ -1234,19 +1270,19 @@ def SetPositions(self, def ToOBMol(self) -> 'openbabel.OBMol': """ - Convert RDKitMol to a OBMol. + Convert ``RDKitMol`` to a ``OBMol``. Returns: - OBMol: The corresponding openbabel OBMol. + OBMol: The corresponding openbabel ``OBMol``. """ return rdkit_mol_to_openbabel_mol(self) def ToRWMol(self) -> RWMol: """ - Convert the RDKitMol Molecule back to a RDKit Chem.rdchem.RWMol. + Convert the ``RDKitMol`` Molecule back to a RDKit ``Chem.rdchem.RWMol``. returns: - RWMol: A RDKit Chem.rdchem.RWMol molecule. + RWMol: A RDKit ``Chem.rdchem.RWMol`` molecule. """ return self._mol @@ -1315,7 +1351,7 @@ def ToXYZ(self, comment: str = '', ) -> str: """ - Convert RDKitMol to a xyz string. + Convert ``RDKitMol`` to a xyz string. Args: confId (int): The conformer ID to be exported. @@ -1336,7 +1372,7 @@ def ToMolBlock(self, confId: int = -1, ) -> str: """ - Convert RDKitMol to a mol block string. + Convert ``RDKitMol`` to a mol block string. Args: confId (int): The conformer ID to be exported. @@ -1350,10 +1386,13 @@ def ToAtoms(self, confId: int = 0, ) -> Atoms: """ - Convert RDKitMol to the ase.Atoms object. + Convert ``RDKitMol`` to the ``ase.Atoms`` object. Args: - confId (int): The conformer ID to be exported. + confId (int): The conformer ID to be exported. Defaults to ``0``. + + Returns: + Atoms: The corresponding ``ase.Atoms`` object. """ atoms = Atoms(positions=self.GetPositions(id=confId), numbers=self.GetAtomicNumbers()) @@ -1406,6 +1445,12 @@ def GetInternalCoordinates(self, ) -> list: """ Get internal coordinates of the molecule. + + Args: + nonredundant (bool): Whether to return nonredundant internal coordinates. Defaults to ``True``. + + Returns: + list: A list of internal coordinates. """ bonds, angles, torsions = get_internal_coords(self.ToOBMol(), nonredundant=nonredundant) @@ -1437,11 +1482,10 @@ def GetTorsionTops(self, Args: torsion (Iterable): An iterable with four elements and the 2nd and 3rd are the pivot of the torsion. - allowNonbondPivots (bool, optional): Allow non-bonding pivots + allowNonbondPivots (bool, optional): Allow non-bonding pivots. Defaults to ``False``. Returns: - - one of the top of the torsion - - the other top of the torsion + tuple: Two frags, one of the top of the torsion, and the other top of the torsion. """ pivot = [int(i) for i in torsion[1:3]] try: @@ -1762,7 +1806,7 @@ def SetVdwMatrix(self, vdw_radii: dict = VDW_RADII): """ Set the derived Van der Waals matrix, which is an upper triangle matrix - calculated from a threshold usually around 0.4 of the Van der Waals Radii. + calculated from a threshold usually around ``0.4`` of the Van der Waals Radii. Its diagonal elements are all zeros. The element (i, j) is calculated by threshold * sum( R(atom i) + R(atom j) ). If two atoms are bonded, the value is set to be zero. When threshold = 0.4, the value is close to the covalent bond @@ -1772,7 +1816,7 @@ def SetVdwMatrix(self, threshold (float): The threshold used to calculate the derived Van der Waals matrix. A larger value results in a matrix with larger values; When compared with distance matrix, it may overestiate the - overlapping between atoms. The default value is 0.4. + overlapping between atoms. The default value is ``0.4``. vdw_radii (dict): A dict stores the Van der Waals radii of different elements. Raises: @@ -1791,10 +1835,12 @@ def parse_xyz_or_smiles_list(mol_list, Args: mol_list (list): a list of smiles or xyzs or tuples of (string, multiplicity) to specify desired multiplicity. - E.g., ['CCC', 'H 0 0 0', ('[CH2]', 1)] + E.g., ``['CCC', 'H 0 0 0', ('[CH2]', 1)]`` with_3d_info (bool): Whether to indicate which entries are from 3D representations. - Defaults to False. + Defaults to ``False``. + Returns: + list: A list of RDKitMol objects. """ mols, is_3D = [], [] for mol in mol_list: @@ -1835,7 +1881,7 @@ def generate_vdw_mat(rd_mol, threshold (float): The threshold used to calculate the derived Van der Waals matrix. A larger value results in a matrix with larger values; When compared with distance matrix, it may overestiate the - overlapping between atoms. The default value is 0.4. + overlapping between atoms. The default value is ``0.4``. vdw_radii (dict): A dict stores the Van der Waals radii of different elements. Raises: @@ -1877,10 +1923,10 @@ def generate_radical_resonance_structures(mol: RDKitMol, Args: mol (RDKitMol): A radical molecule. - unique (bool, optional): Filter out duplicate resonance structures from the list. Defaults to True. + unique (bool, optional): Filter out duplicate resonance structures from the list. Defaults to ``True``. consider_atommap (bool, atommap): If consider atom map numbers in filtration duplicates. - Only effective when uniquify=True. Defaults to False. - kekulize (bool, optional): Whether to kekulize the molecule. Defaults to False. When True, uniquifying + Only effective when uniquify=True. Defaults to ``False``. + kekulize (bool, optional): Whether to kekulize the molecule. Defaults to ``False``. When ``True``, uniquifying process will be skipped. Returns: @@ -1972,7 +2018,7 @@ def has_matched_mol(mol: RDKitMol, mols (List[RDKitMol]): The list of molecules to be processed. consider_atommap (bool, optional): If treat chemically equivalent molecules with different atommap numbers as different molecules. - Defaults to False. + Defaults to ``False``. Returns: bool: if a matched molecules if found. @@ -1998,9 +2044,9 @@ def get_unique_mols(mols: List[RDKitMol], mols (list): The molecules to be processed. consider_atommap (bool, optional): If treat chemically equivalent molecules with different atommap numbers as different molecules. - Defaults to False. + Defaults to ``False``. same_formula (bool, opional): If the mols has the same formula you may set it to True - to save computational time. Defaults to False. + to save computational time. Defaults to ``False``. Returns: list: A list of unique molecules. From 633ba2eaa89ea8efc5b06b18240f36e9fb7187c9 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 12:48:11 -0400 Subject: [PATCH 15/21] Update installation guide --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 16debffc..e889cb9c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -27,7 +27,7 @@ You can also install RDMC from the source code: cd RDMC conda env create -f environment.yml conda activate rdmc - python setup.py install + python -m pip install --no-deps -vv ./ To start with, simply try: From 14d03583909eb8caa474c1573effe77c46018f71 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 12:51:10 -0400 Subject: [PATCH 16/21] Add the docs for conformer_generation module Initiate with the align submodule with corrected docstring. --- .../reference/conformer_generation/align.rst | 7 + .../reference/conformer_generation/index.rst | 13 ++ docs/source/reference/rdmc.rst | 1 + rdmc/conformer_generation/align.py | 120 +++++++++++------- 4 files changed, 92 insertions(+), 49 deletions(-) create mode 100644 docs/source/reference/conformer_generation/align.rst create mode 100644 docs/source/reference/conformer_generation/index.rst diff --git a/docs/source/reference/conformer_generation/align.rst b/docs/source/reference/conformer_generation/align.rst new file mode 100644 index 00000000..c3f8ee5d --- /dev/null +++ b/docs/source/reference/conformer_generation/align.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.align +================================= + +.. automodule:: rdmc.conformer_generation.align + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/index.rst b/docs/source/reference/conformer_generation/index.rst new file mode 100644 index 00000000..8a376c67 --- /dev/null +++ b/docs/source/reference/conformer_generation/index.rst @@ -0,0 +1,13 @@ +rdmc.conformer_generation +========================== + +.. automodule:: rdmc.conformer_generation + :members: + :undoc-members: + :show-inheritance: + + +.. toctree:: + :maxdepth: 2 + + align diff --git a/docs/source/reference/rdmc.rst b/docs/source/reference/rdmc.rst index 668521b5..0d6152c7 100644 --- a/docs/source/reference/rdmc.rst +++ b/docs/source/reference/rdmc.rst @@ -12,6 +12,7 @@ rdmc external/index mathlib/index + conformer_generation/index .. toctree:: diff --git a/rdmc/conformer_generation/align.py b/rdmc/conformer_generation/align.py index 8e6a91c7..9ffa5c1c 100644 --- a/rdmc/conformer_generation/align.py +++ b/rdmc/conformer_generation/align.py @@ -20,19 +20,23 @@ class NaiveAlign(object): """ This is a naive alignment algorithm aligns reactant conformers. - For 1 reactant system, the algorithm simply put the center of the reactant at the origin. - For 2 reactant system, the resulting alignment has the following charateristics: - - the centroid for fragment 1 is at the origin. - - the centroid for fragment 2 is at (R1 + R2 + D), where R1 is the radius of fragment 1, R2 is the radius - of fragment 2, and D is a pre-set distance value defined by the user. - - the centroid of the reacting atoms in fragment 1 should be around the line (0,0,0) => (1,0,0). - - the centroid of the reacting atoms in fragment 2 should be around the line (0,0,0) => (1,0,0). - - the distance between atoms to form bonds are minimized. - # TODO: [long term goal] - For 3 reactant system, the feature is under-development. There should be two cases: close-linear alignment and triangle alignment, - depending on the max number of interactions among fragments. - For 4 reactant system, the feature is under-development. There should be three cases: close-linear, square, and tetrahedron alignment, - depending on the max number of interactions among fragments. + + - For 1 reactant system, the algorithm simply put the center of the reactant at the origin. + - For 2 reactant system, the resulting alignment has the following characteristics: + + - the centroid for fragment 1 is at the origin. + - the centroid for fragment 2 is at (R1 + R2 + D), where R1 is the radius of fragment 1, R2 is the radius + of fragment 2, and D is a pre-set distance value defined by the user. + - the centroid of the reacting atoms in fragment 1 should be around the line (0,0,0) => (1,0,0). + - the centroid of the reacting atoms in fragment 2 should be around the line (0,0,0) => (1,0,0). + - the distance between atoms to form bonds are minimized. + + The following are under development: + + - For 3 reactant system, the feature is under-development. There should be two cases: close-linear alignment and triangle alignment, + depending on the max number of interactions among fragments. + - For 4 reactant system, the feature is under-development. There should be three cases: close-linear, square, and tetrahedron alignment, + depending on the max number of interactions among fragments. """ dist = 2.0 @@ -48,12 +52,12 @@ def __init__(self, Args: coords (np.array): The coordinates of the reactant complex. - atom_maps (List[List]): The atom map in the complex. E.g., ([1,2,5], [3,4]) indicates the 1st, + atom_maps (List[List]): The atom map in the complex. E.g., ``([1,2,5], [3,4])`` indicates the 1st, 2nd, and 5th atoms are in the first molecule and 3th and 4th atoms are in the second molecule. - formed_bonds (List[tuple]): The bonds that are formed in the reaction. E.g., [(1,2)] indicates + formed_bonds (List[tuple]): The bonds that are formed in the reaction. E.g., ``[(1,2)]`` indicates atoms 1 and 2 will form a bond in the reaction. - broken_bonds (List[tuple]): The bonds that are broken in the reaction. E.g., [(1,2)] indicates + broken_bonds (List[tuple]): The bonds that are broken in the reaction. E.g., ``[(1,2)]`` indicates the bond between atoms 1 and 2 will be broken in the reaction. """ self.coords = coords @@ -116,8 +120,8 @@ def from_reactants(cls, mols (RDKitMol): A list of reactants. formed_bonds (List[tuple]): bonds formed in the reaction. broken_bonds (List[tuple]): bonds broken in the reaction. - conf_id1 (int, optional): The conformer id to be used in `mol1`. Defaults to 0. - conf_id2 (int, optional): The conformer id to be used in `mol2`. Defaults to 0. + conf_id1 (int, optional): The conformer id to be used in `mol1`. Defaults to ``0``. + conf_id2 (int, optional): The conformer id to be used in `mol2`. Defaults to ``0``. """ if conf_ids is None: conf_ids == [0] * len(mols) @@ -145,7 +149,7 @@ def from_complex(cls, r_complex (RDKitMol): The reactant complex. formed_bonds (List[tuple]): bonds formed in the reaction. broken_bonds (List[tuple]): bonds broken in the reaction. - conf_id (int, optional): The conformer id to be used in the `complex`. Defaults to 0. + conf_id (int, optional): The conformer id to be used in the ``r_complex``. Defaults to ``0``. """ coords = r_complex.GetPositions(id=conf_id) atom_maps = [list(atom_map) for atom_map in r_complex.GetMolFrags()] @@ -165,7 +169,7 @@ def from_r_and_p_complex(cls, Args: r_complex (RDKitMol): The reactant complex. p_complex (RDKitMol): The product complex. - conf_id (int, optional): The conformer id to be used in the reactant complex `r_complex`. Defaults to 0. + conf_id (int, optional): The conformer id to be used in the reactant complex ``r_complex``. Defaults to ``0``. """ coords = r_complex.GetPositions(id=conf_id) atom_maps = [list(atom_map) for atom_map in r_complex.GetMolFrags()] @@ -182,8 +186,8 @@ def rotate_fragment_separately(self, Args: angles (np.array): Rotation angles for molecule fragment 1. It should be an array with a - size of (1,3) indicate the rotation angles about the x, y, and z axes, respectively. - about_reacting (bool, optional): If rotate about the reactor center instead of the centroid. Defaults to False. + size of ``(1,3)`` indicate the rotation angles about the x, y, and z axes, respectively. + about_reacting (bool, optional): If rotate about the reactor center instead of the centroid. Defaults to ``False``. Returns: np.array: The coordinates after the rotation operation. @@ -207,7 +211,8 @@ def initialize_align(self, Initialize the alignment for the reactants. Currently only available for 1 reactant and 2 reactant systems. Args: - dist (float, optional): The a preset distance used to separate molecules. Defaults to None meaning using the value of `self.dist`. + dist (float, optional): The a preset distance used to separate molecules. Defaults to ``None``, + meaning using the default value of :obj:`dist`. """ if dist is not None and dist > 0: self.dist = dist @@ -230,7 +235,7 @@ def score_bimolecule(self, angles: np.array, ) -> float: """ - Calculate the score of bimolecule alignment. + Calculate the score of bimolecular reaction alignment. Args: angles (np.array): an array with 6 elements. The first 3 angles correspond to the rotation of the first fragment, @@ -281,12 +286,17 @@ def score_bimolecule(self, return score1 + score2 + score3 def get_alignment_coords(self, - dist: float = None,): + dist: float = None, + ) -> np.array: """ Get coordinates of the alignment. Args: - dist (float, optional): The a preset distance used to separate molecules. Defaults to None meaning using the value of `self.dist`. + dist (float, optional): The a preset distance used to separate molecules. + Defaults to ``None meaning`` using the value of :obj:`dist`. + + Returns: + np.array: The coordinates of the alignment. """ self.initialize_align(dist=dist,) @@ -304,26 +314,32 @@ def get_alignment_coords(self, def __call__(self, dist: float = None,): """ - Get coordinates of the alignment. Same as `self.get_alignment` + Get coordinates of the alignment. Equivalent to calling :obj:`get_alignment`. Args: - dist (float, optional): The a preset distance used to separate molecules. Defaults to None meaning using the value of `self.dist`. + dist (float, optional): The preset distance used to separate molecules. + Defaults to ``None`` meaning using the value of :obj:`dist`. + + Returns: + np.array: The coordinates of the alignment. """ return self.get_alignment_coords(dist=dist) -def reset_pmol(r_mol, p_mol): +def reset_pmol(r_mol: 'RDKitMol', + p_mol: 'RDKitMol', + ) -> 'RDKitMol': """ Reset the product mol to best align with the reactant. This procedure consists of initializing the product 3D structure with the reactant coordinates and then 1) minimizing the product structure with constraints for broken - bonds and 2) performing a second minimization with no constraints + bonds and 2) performing a second minimization with no constraints. Args: - r_mol ('RDKitMol' or 'Mol'): a RDKit Mol object - p_mol ('RDKitMol' or 'Mol'): a RDKit Mol object + r_mol ('RDKitMol' or 'Mol'): An RDKit Mol object for reactant. + p_mol ('RDKitMol' or 'Mol'): An RDKit Mol object for product. - Returns - new_p_mol: The new product mol with changed coordinates + Returns: + RDKitMol: The new product mol with changed coordinates. """ # copy current pmol and set new positions p_mol_new = p_mol.Copy(quickCopy=True) @@ -351,19 +367,23 @@ def reset_pmol(r_mol, p_mol): return obff.get_optimized_mol() -def prepare_mols(r_mol, p_mol, align_bimolecular=True): +def prepare_mols(r_mol: 'RDKitMol', + p_mol: 'RDKitMol', + align_bimolecular: bool = True, + ) -> Tuple['RDKitMol', 'RDKitMol']: """ Prepare mols for reaction path analysis. If reactant has multiple fragments, first orient reactants in reacting - orientation. Then, reinitialize coordinates of product using reset_pmol function + orientation. Then, reinitialize coordinates of product using :obj:`reset_pmol` function. Args: - r_mol ('RDKitMol' or 'Mol'): a RDKit Mol object - p_mol ('RDKitMol' or 'Mol'): a RDKit Mol object - align_bimolecular (bool, optional): Whether or not to use alignment algorithm on bimolecular reactions - (defaults to True) - - Returns - r_mol, new_p_mol: The new reactant and product mols + r_mol ('RDKitMol' or 'Mol'): An RDKit Mol object for reactant. + p_mol ('RDKitMol' or 'Mol'): An RDKit Mol object for product. + align_bimolecular (bool, optional): Whether or not to use alignment algorithm on bimolecular reactions. + Defaults to ``True`` + + Returns: + r_mol ('RDKitMol'): The new reactant molecule. + p_mol_new ('RDKitMol'): The new product molecule. """ if len(r_mol.GetMolFrags()) == 2: if align_bimolecular: @@ -372,16 +392,18 @@ def prepare_mols(r_mol, p_mol, align_bimolecular=True): return r_mol, p_mol_new -def align_reactant_fragments(r_mol, p_mol): +def align_reactant_fragments(r_mol: 'RDKitMol', + p_mol: 'RDKitMol', + ) -> 'RDKitMol': """ - Given reactant and product mols, find details of formed and broken bonds and generate reacting reactant complex + Given reactant and product mols, find details of formed and broken bonds and generate reacting reactant complex. Args: - r_mol ('RDKitMol' or 'Mol'): a RDKit Mol object - p_mol ('RDKitMol' or 'Mol'): a RDKit Mol object + r_mol ('RDKitMol' or 'Mol'): An RDKit Mol object for reactant. + p_mol ('RDKitMol' or 'Mol'): An RDKit Mol object for product. - Returns - r_mol_naive_align: The new reactant with aligned fragments + Returns: + RDKitMol: The new reactant with aligned fragments. """ formed_bonds, broken_bonds = get_formed_and_broken_bonds(r_mol, p_mol) if len(formed_bonds + broken_bonds) == 0: From 7fd2f8c02bdad38ca898c127bdff69d97b9cfa92 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 13:24:35 -0400 Subject: [PATCH 17/21] Create docs of embedders and ts_guessers 1. Create a section of embedding geometries 2. Update the docstrings of embedders and ts_guessers --- .../{ => embedding_geometries}/align.rst | 0 .../embedding_geometries/embedders.rst | 7 ++ .../embedding_geometries/index.rst | 9 ++ .../embedding_geometries/ts_guessers.rst | 7 ++ .../reference/conformer_generation/index.rst | 4 +- rdmc/conformer_generation/embedders.py | 111 ++++++++++++++++-- rdmc/conformer_generation/ts_guessers.py | 86 +++++++++----- 7 files changed, 186 insertions(+), 38 deletions(-) rename docs/source/reference/conformer_generation/{ => embedding_geometries}/align.rst (100%) create mode 100644 docs/source/reference/conformer_generation/embedding_geometries/embedders.rst create mode 100644 docs/source/reference/conformer_generation/embedding_geometries/index.rst create mode 100644 docs/source/reference/conformer_generation/embedding_geometries/ts_guessers.rst diff --git a/docs/source/reference/conformer_generation/align.rst b/docs/source/reference/conformer_generation/embedding_geometries/align.rst similarity index 100% rename from docs/source/reference/conformer_generation/align.rst rename to docs/source/reference/conformer_generation/embedding_geometries/align.rst diff --git a/docs/source/reference/conformer_generation/embedding_geometries/embedders.rst b/docs/source/reference/conformer_generation/embedding_geometries/embedders.rst new file mode 100644 index 00000000..a9864870 --- /dev/null +++ b/docs/source/reference/conformer_generation/embedding_geometries/embedders.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.embedders +=================================== + +.. automodule:: rdmc.conformer_generation.embedders + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/embedding_geometries/index.rst b/docs/source/reference/conformer_generation/embedding_geometries/index.rst new file mode 100644 index 00000000..67ed1854 --- /dev/null +++ b/docs/source/reference/conformer_generation/embedding_geometries/index.rst @@ -0,0 +1,9 @@ +Embedding geometries +===================== + +.. toctree:: + :maxdepth: 2 + + embedders + align + ts_guessers diff --git a/docs/source/reference/conformer_generation/embedding_geometries/ts_guessers.rst b/docs/source/reference/conformer_generation/embedding_geometries/ts_guessers.rst new file mode 100644 index 00000000..9ecbc2cd --- /dev/null +++ b/docs/source/reference/conformer_generation/embedding_geometries/ts_guessers.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.ts_guessers +====================================== + +.. automodule:: rdmc.conformer_generation.ts_guessers + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/index.rst b/docs/source/reference/conformer_generation/index.rst index 8a376c67..b85f5084 100644 --- a/docs/source/reference/conformer_generation/index.rst +++ b/docs/source/reference/conformer_generation/index.rst @@ -6,8 +6,8 @@ rdmc.conformer_generation :undoc-members: :show-inheritance: - .. toctree:: :maxdepth: 2 - align + embedding_geometries/index + diff --git a/rdmc/conformer_generation/embedders.py b/rdmc/conformer_generation/embedders.py index f8734378..c7fd6828 100644 --- a/rdmc/conformer_generation/embedders.py +++ b/rdmc/conformer_generation/embedders.py @@ -26,6 +26,9 @@ class ConfGenEmbedder: + """ + Base class for conformer generation embedders. + """ def __init__(self, track_stats=False): self.iter = 0 @@ -35,7 +38,13 @@ def __init__(self, track_stats=False): self.stats = [] self.smiles = None - def update_mol(self, smiles): + def update_mol(self, smiles: str): + """ + Update the molecule graph based on the SMILES string. + + Args: + smiles (str): SMILES string of the molecule + """ # Only update the molecule if smiles is changed # Only copy the molecule graph from the previous run rather than conformers if smiles != self.smiles: @@ -45,12 +54,33 @@ def update_mol(self, smiles): # Copy the graph but remove conformers self.mol = self.mol.Copy(quickCopy=True) - def embed_conformers(self, n_conformers): + def embed_conformers(self, + n_conformers: int): + """ + Embed conformers according to the molecule graph. + + Args: + n_conformers (int): Number of conformers to generate. + + Raises: + NotImplementedError: This method needs to be implemented in the subclass. + """ raise NotImplementedError def update_stats(self, - n_trials, - time=0.): + n_trials: int, + time: float = 0. + ) -> dict: + """ + Update the statistics of the conformer generation. + + Args: + n_trials (int): Number of trials + time (float, optional): Time spent on conformer generation. Defaults to ``0.``. + + Returns: + dict: Statistics of the conformer generation + """ n_success = self.mol.GetNumConformers() self.n_success = n_success self.percent_success = n_success / n_trials * 100 @@ -62,10 +92,27 @@ def update_stats(self, return stats def write_mol_data(self): + """ + Write the molecule data. + + Returns: + dict: Molecule data. + """ return mol_to_dict(self.mol, copy=False, iter=self.iter) - def __call__(self, smiles, n_conformers): + def __call__(self, + smiles: str, + n_conformers: int): + """ + Embed conformers according to the molecule graph. + + Args: + smiles (str): SMILES string of the molecule. + n_conformers (int): Number of conformers to generate. + Returns: + dict: Molecule data. + """ self.iter += 1 time_start = time() self.update_mol(smiles) @@ -82,7 +129,20 @@ def __call__(self, smiles, n_conformers): class GeoMolEmbedder(ConfGenEmbedder): - def __init__(self, trained_model_dir, dataset="drugs", temp_schedule="linear", track_stats=False): + """ + Embed conformers using GeoMol. + + Args: + trained_model_dir (str): Directory of the trained model. + dataset (str, optional): Dataset used for training. Defaults to ``"drugs"``. + temp_schedule (str, optional): Temperature schedule. Defaults to ``"linear"``. + track_stats (bool, optional): Whether to track the statistics of the conformer generation. Defaults to ``False``. + """ + def __init__(self, + trained_model_dir: str, + dataset: str = "drugs", + temp_schedule: str = "linear", + track_stats: bool = False): super(GeoMolEmbedder, self).__init__(track_stats) # TODO: add option of pre-pruning geometries using alpha values @@ -101,8 +161,17 @@ def __init__(self, trained_model_dir, dataset="drugs", temp_schedule="linear", t self.temp_schedule = temp_schedule self.dataset = dataset - def embed_conformers(self, n_conformers): + def embed_conformers(self, + n_conformers: int): + """ + Embed conformers according to the molecule graph. + Args: + n_conformers (int): Number of conformers to generate. + + Returns: + mol: Molecule with conformers. + """ # set "temperature" if self.temp_schedule == "none": self.model.random_vec_std = self.std @@ -127,13 +196,37 @@ def embed_conformers(self, n_conformers): return self.mol class ETKDGEmbedder(ConfGenEmbedder): + """ + Embed conformers using ETKDG. + """ + + def embed_conformers(self, n_conformers: int): + """ + Embed conformers according to the molecule graph. + + Args: + n_conformers (int): Number of conformers to generate. - def embed_conformers(self, n_conformers): + Returns: + mol: Molecule with conformers. + """ self.mol.EmbedMultipleConfs(n_conformers) return self.mol class RandomEmbedder(ConfGenEmbedder): + """ + Embed conformers with coordinates of random numbers. + """ + + def embed_conformers(self, n_conformers: int): + """ + Embed conformers according to the molecule graph. + + Args: + n_conformers (int): Number of conformers to generate. - def embed_conformers(self, n_conformers): + Returns: + mol: Molecule with conformers. + """ self.mol.EmbedMultipleNullConfs(n_conformers, random=True) return self.mol diff --git a/rdmc/conformer_generation/ts_guessers.py b/rdmc/conformer_generation/ts_guessers.py index 4c5c95cc..55ce333b 100644 --- a/rdmc/conformer_generation/ts_guessers.py +++ b/rdmc/conformer_generation/ts_guessers.py @@ -2,7 +2,7 @@ #-*- coding: utf-8 -*- """ -Modules for providing transition state initial guess geometries +Modules for providing transition state initial guess geometries. """ # RDKit import first to avoid some import or runtime issues # TODO: Details to be added. @@ -78,6 +78,9 @@ def __init__(self, config): class TSInitialGuesser: """ The abstract class for TS initial Guesser. + + Args: + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ _avail_ = True @@ -88,7 +91,7 @@ def __init__(self, Initialize the TS initial guesser. Args: - track_stats (bool, optional): Whether to track the status. Defaults to False. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ assert self._avail, f"The dependency requirement needs to be fulfilled to use {self.__class__.__name__}. Please install the relevant dependencies and try again.." self.track_stats = track_stats @@ -104,6 +107,16 @@ def generate_ts_guesses(self, The key function used to generate TS guesses. It varies by the actual classes and need to implemented inside each class. The function should at least take mols and save_dir as input arguments. The returned value should be a RDKitMol with TS geometries. + + Args: + mols (list): A list of reactant and product pairs. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None`` for not saving. + + Returns: + RDKitMol: The TS molecule in ``RDKitMol`` with 3D conformer saved with the molecule. + + Raises: + NotImplementedError: This method needs to be implemented in the subclass. """ raise NotImplementedError @@ -112,7 +125,7 @@ def save_guesses(self, rp_combos: list, ts_mol: 'RDKitMol'): """ - Save the generated guesses into the given `save_dir`. + Save the generated guesses into the given ``save_dir``. Args: save_dir (str): The path to the directory to save the results. @@ -168,12 +181,12 @@ def __call__(self, The workflow to generate TS initial guesses. Args: - mols (list): A list of molecules - multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to None. - save_dir (str, optional): The path to save results. Defaults to None. + mols (list): A list of molecules. + multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to ``None`` for not setting. + save_dir (str, optional): The path to save results. Defaults to ``None`` for not saving. Returns: - 'RDKitMol' + RDKitMol: The TS molecule in RDKitMol with 3D conformer saved with the molecule. """ time_start = time() ts_mol_data = self.generate_ts_guesses(mols, multiplicity, save_dir) @@ -189,6 +202,10 @@ def __call__(self, class TSEGNNGuesser(TSInitialGuesser): """ The class for generating TS guesses using the TS-EGNN model. + + Args: + trained_model_dir (str): The path to the directory storing the trained TS-EGNN model. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ _avail = _ts_egnn_avail @@ -200,7 +217,7 @@ def __init__(self, Args: trained_model_dir (str): The path to the directory storing the trained TS-EGNN model. - track_stats (bool, optional): Whether to track the status. Defaults to False. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(TSEGNNGuesser, self).__init__(track_stats) @@ -225,11 +242,11 @@ def generate_ts_guesses(self, Args: mols (list): A list of reactant and product pairs. - multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to None. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to ``None``. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The TS molecule in ``RDKitMol`` with 3D conformer saved with the molecule. """ # Generate the input for the TS-EGNN model rp_inputs = [(x[0].ToRWMol(), None, x[1].ToRWMol()) for x in mols] # reactant, None (for TS), product @@ -255,6 +272,10 @@ def generate_ts_guesses(self, class TSGCNGuesser(TSInitialGuesser): """ The class for generating TS guesses using the TS-GCN model. + + Args: + trained_model_dir (str): The path to the directory storing the trained TS-GCN model. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ _avail = _ts_gcn_avail @@ -266,7 +287,7 @@ def __init__(self, Args: trained_model_dir (str): The path to the directory storing the trained TS-GCN model. - track_stats (bool, optional): Whether to track the status. Defaults to False. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(TSGCNGuesser, self).__init__(track_stats) @@ -292,11 +313,11 @@ def generate_ts_guesses(self, Args: mols (list): A list of reactant and product pairs. - multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to None. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to ``None``. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The TS molecule in RDKitMol with 3D conformer saved with the molecule. """ # Prepare the input for the TS-GCN model rp_inputs = [(x[0].ToRWMol(), None, x[1].ToRWMol()) for x in mols] @@ -323,6 +344,9 @@ def generate_ts_guesses(self, class RMSDPPGuesser(TSInitialGuesser): """ The class for generating TS guesses using the RMSD-PP method. + + Args: + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ _avail = True @@ -349,7 +373,7 @@ def generate_ts_guesses(self, save_dir (Optional[str], optional): The path to save the results. Defaults to None. Returns: - RDKitMol + RDKitMol: The TS molecule in RDKitMol with 3D conformer saved with the molecule. """ ts_guesses, used_rp_combos = [], [] multiplicity = multiplicity or 1 @@ -377,6 +401,10 @@ def generate_ts_guesses(self, class AutoNEBGuesser(TSInitialGuesser): """ The class for generating TS guesses using the AutoNEB method. + + Args: + optimizer (ase.calculator.calculator.Calculator): ASE calculator. Defaults to the XTB implementation ``xtb.ase.calculator.XTB``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ _avail = _ase_avail @@ -387,8 +415,8 @@ def __init__(self, Initialize the AutoNEB TS initial guesser. Args: - optimizer (ase.calculator.calculator.Calculator): ASE calculator. Defaults to the XTB implementation `xtb.ase.calculator.XTB`. - track_stats (bool, optional): Whether to track the status. Defaults to False. + optimizer (ase.calculator.calculator.Calculator): ASE calculator. Defaults to the XTB implementation ``xtb.ase.calculator.XTB``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(AutoNEBGuesser, self).__init__(track_stats) self.optimizer = optimizer @@ -411,7 +439,8 @@ def optimizer(self): return self._optimizer @optimizer.setter - def optimizer(self, optimizer: 'Calculator'): + def optimizer(self, + optimizer: 'Calculator'): try: assert isinstance(optimizer, Calculator), f"Invalid optimizer used ('{optimizer}'). Please use ASE calculators." except NameError: @@ -427,11 +456,11 @@ def generate_ts_guesses(self, Args: mols (list): A list of reactant and product pairs. - multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to None. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to ``None``. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The TS molecule in RDKitMol with 3D conformer saved with the molecule. """ ts_guesses, used_rp_combos = [], [] @@ -502,7 +531,10 @@ def generate_ts_guesses(self, class DEGSMGuesser(TSInitialGuesser): """ - The class for generatign TS guesses using the DE-GSM method. + The class for generating TS guesses using the DE-GSM method. + + Args: + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ _avail = True @@ -516,7 +548,7 @@ def __init__(self, Initialize the DE-GSM TS initial guesser. Args: - track_stats (bool, optional): Whether to track the status. Defaults to False. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(DEGSMGuesser, self).__init__(track_stats) self.gsm_args = gsm_args @@ -538,11 +570,11 @@ def generate_ts_guesses(self, Args: mols (list): A list of reactant and product pairs. - multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to None. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the reaction. Defaults to ``None``. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The TS molecule in RDKitMol with 3D conformer saved with the molecule. """ # #TODO: May add a support for scratch directory # currently use the save directory as the working directory From 62afafeca37e95853d8322d2f0355ca143aadeed Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 13:50:17 -0400 Subject: [PATCH 18/21] Add docs for non-TS and TS optimizers Add a section called "Geometry Optimization", and clean up the docstrings in the optimizers.py and ts_optimizers.py --- .../embedding_geometries/index.rst | 2 +- .../geometry_optimization/index.rst | 8 ++ .../geometry_optimization/optimizers.rst | 7 ++ .../geometry_optimization/ts_optimizers.rst | 7 ++ .../reference/conformer_generation/index.rst | 1 + rdmc/conformer_generation/optimizers.py | 117 +++++++++++++++--- rdmc/conformer_generation/ts_optimizers.py | 117 ++++++++++++------ 7 files changed, 204 insertions(+), 55 deletions(-) create mode 100644 docs/source/reference/conformer_generation/geometry_optimization/index.rst create mode 100644 docs/source/reference/conformer_generation/geometry_optimization/optimizers.rst create mode 100644 docs/source/reference/conformer_generation/geometry_optimization/ts_optimizers.rst diff --git a/docs/source/reference/conformer_generation/embedding_geometries/index.rst b/docs/source/reference/conformer_generation/embedding_geometries/index.rst index 67ed1854..476a701b 100644 --- a/docs/source/reference/conformer_generation/embedding_geometries/index.rst +++ b/docs/source/reference/conformer_generation/embedding_geometries/index.rst @@ -1,4 +1,4 @@ -Embedding geometries +Embedding Geometries ===================== .. toctree:: diff --git a/docs/source/reference/conformer_generation/geometry_optimization/index.rst b/docs/source/reference/conformer_generation/geometry_optimization/index.rst new file mode 100644 index 00000000..8df037a5 --- /dev/null +++ b/docs/source/reference/conformer_generation/geometry_optimization/index.rst @@ -0,0 +1,8 @@ +Geometry Optimization +===================== + +.. toctree:: + :maxdepth: 2 + + optimizers + ts_optimizers diff --git a/docs/source/reference/conformer_generation/geometry_optimization/optimizers.rst b/docs/source/reference/conformer_generation/geometry_optimization/optimizers.rst new file mode 100644 index 00000000..35354af0 --- /dev/null +++ b/docs/source/reference/conformer_generation/geometry_optimization/optimizers.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.optimizers +===================================== + +.. automodule:: rdmc.conformer_generation.optimizers + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/geometry_optimization/ts_optimizers.rst b/docs/source/reference/conformer_generation/geometry_optimization/ts_optimizers.rst new file mode 100644 index 00000000..7b69d6be --- /dev/null +++ b/docs/source/reference/conformer_generation/geometry_optimization/ts_optimizers.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.ts_optimizers +======================================= + +.. automodule:: rdmc.conformer_generation.ts_optimizers + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/index.rst b/docs/source/reference/conformer_generation/index.rst index b85f5084..dd034f03 100644 --- a/docs/source/reference/conformer_generation/index.rst +++ b/docs/source/reference/conformer_generation/index.rst @@ -10,4 +10,5 @@ rdmc.conformer_generation :maxdepth: 2 embedding_geometries/index + geometry_optimization/index diff --git a/rdmc/conformer_generation/optimizers.py b/rdmc/conformer_generation/optimizers.py index d6696380..c72d963d 100644 --- a/rdmc/conformer_generation/optimizers.py +++ b/rdmc/conformer_generation/optimizers.py @@ -2,7 +2,7 @@ #-*- coding: utf-8 -*- """ -Modules for optimizing initial guess geometries +Modules for optimizing initial guess geometries. """ from rdmc.forcefield import RDKitFF @@ -23,6 +23,12 @@ class ConfGenOptimizer: + """ + Base class for the geometry optimizers used in conformer generation. + + Args: + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. + """ def __init__(self, track_stats=False): self.iter = 0 @@ -32,10 +38,31 @@ def __init__(self, track_stats=False): self.n_opt_cycles = None self.stats = [] - def optimize_conformers(self, mol_data): + def optimize_conformers(self, + mol_data: List[dict]): + """ + Optimize the conformers. + + Args: + mol_data (List[dict]): The list of conformers to be optimized. + + Raises: + NotImplementedError: This function should be implemented in the child class. + """ raise NotImplementedError - def __call__(self, mol_data): + def __call__(self, + mol_data: List[dict], + ) -> List[dict]: + """ + Run the workflow to optimize the conformers. + + Args: + mol_data (List[dict]): The list of conformers to be optimized. + + Returns: + List[dict]: The list of optimized conformers. + """ self.iter += 1 time_start = time() @@ -55,15 +82,34 @@ def __call__(self, mol_data): class MMFFOptimizer(ConfGenOptimizer): - def __init__(self, method="rdkit", track_stats=False): + """ + Optimizer using the MMFF force field. + + Args: + method (str, optional): The method to be used for stable species optimization. Defaults to ``"rdkit"``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. + """ + def __init__(self, + method: str = "rdkit", + track_stats: bool = False): super(MMFFOptimizer, self).__init__(track_stats) if method == "rdkit": self.ff = RDKitFF() elif method == "openbabel": raise NotImplementedError - def optimize_conformers(self, mol_data): + def optimize_conformers(self, + mol_data: List[dict], + ) -> List[dict]: + """ + Optimize the conformers. + Args: + mol_data (List[dict]): The list of conformers to be optimized. + + Returns: + List[dict]: The list of optimized conformers sorted by energy. + """ if len(mol_data) == 0: return mol_data @@ -90,13 +136,34 @@ def optimize_conformers(self, mol_data): class XTBOptimizer(ConfGenOptimizer): - def __init__(self, method="gff", level="normal", track_stats=False): + """ + Optimizer using the xTB. + + Args: + method (str, optional): The method to be used for species optimization. Defaults to ``"gff"``. + level (str, optional): The level of theory. Defaults to ``"normal"``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. + """ + def __init__(self, + method: str = "gff", + level: str = "normal", + track_stats: bool = False): super(XTBOptimizer, self).__init__(track_stats) self.method = method self.level = level - def optimize_conformers(self, mol_data): + def optimize_conformers(self, + mol_data: List[dict], + ) -> List[dict]: + """ + Optimize the conformers. + + Args: + mol_data (List[dict]): The list of conformers to be optimized. + Returns: + List[dict]: The list of optimized conformers sorted by energy. + """ if len(mol_data) == 0: return mol_data @@ -137,20 +204,30 @@ def optimize_conformers(self, mol_data): return sorted(final_mol_data, key=lambda x: x["energy"]) class GaussianOptimizer(ConfGenOptimizer): + """ + Optimizer using the Gaussian. + + Args: + method (str, optional): The method to be used for species optimization. You can use the level of theory available in Gaussian. + Defaults to ``"GFN2-xTB"``, which is realized by additional scripts provided in the ``rdmc`` package. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. + """ def __init__(self, method: str = "GFN2-xTB", nprocs: int = 1, memory: int = 1, track_stats: bool = False): """ - Initiate the Gaussian berny optimizer. + Initiate the Gaussian optimizer. Args: method (str, optional): The method to be used for stable species optimization. you can use the level of theory available in Gaussian. We provided a script to run XTB using Gaussian, but there are some extra steps to do. Defaults to GFN2-xTB. - nprocs (int, optional): The number of processors to use. Defaults to 1. - memory (int, optional): Memory in GB used by Gaussian. Defaults to 1. - track_stats (bool, optional): Whether to track the status. Defaults to False. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(GaussianOptimizer, self).__init__(track_stats) self.method = method @@ -170,17 +247,18 @@ def optimize_conformers(self, mol: 'RDKitMol', multiplicity: int = 1, save_dir: Optional[str] = None, - **kwargs): + **kwargs, + ) -> 'RDKitMol': """ Optimize the conformers. Args: mol (RDKitMol): An RDKitMol object with all guess geometries embedded as conformers. - multiplicity (int): The multiplicity of the molecule. Defaults to 1. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + multiplicity (int): The multiplicity of the molecule. Defaults to ``1``. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The optimized molecule as RDKitMol with 3D geometries embedded. """ opt_mol = mol.Copy(quickCopy=True, copy_attrs=["KeepIDs"]) @@ -258,11 +336,12 @@ def save_opt_mols(self, ): """ Save the information of the optimized stable species into the directory. + Args: save_dir (str): The path to the directory to save the results. opt_mol (RDKitMol): The optimized stable species in RDKitMol with 3D conformer saved with the molecule. - keep_ids (dict): Dictionary of which opts succeeded and which failed - energies (dict): Dictionary of energies for each conformer + keep_ids (dict): Dictionary of which opts succeeded and which failed. + energies (dict): Dictionary of energies for each conformer. """ # Save optimized stable species mols path = os.path.join(save_dir, "optimized_confs.sdf") @@ -286,9 +365,11 @@ def __call__(self, **kwargs): """ Run the workflow to generate optimize stable species guesses. + Args: mol (RDKitMol): An RDKitMol object with all guess geometries embedded as conformers. - save_dir (str, optional): The path to save results. Defaults to None. + save_dir (str, optional): The path to save results. Defaults to ``None``. + Returns: 'RDKitMol': The optimized molecule as RDKitMol with 3D geometries embedded. """ diff --git a/rdmc/conformer_generation/ts_optimizers.py b/rdmc/conformer_generation/ts_optimizers.py index 47e61b59..edc4b95c 100644 --- a/rdmc/conformer_generation/ts_optimizers.py +++ b/rdmc/conformer_generation/ts_optimizers.py @@ -2,7 +2,7 @@ #-*- coding: utf-8 -*- """ -Modules for optimizing transition state geometries +Modules for optimizing transition state geometries. """ # Import RDKit and RDMC first to avoid unexpected errors @@ -32,11 +32,17 @@ class TSOptimizer: """ The abstract class for TS optimizer. + + Args: + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ def __init__(self, track_stats: Optional[bool] = False): """ Initialize the TS optimizer. + + Args: + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ self.track_stats = track_stats self.n_failures = None @@ -50,15 +56,15 @@ def optimize_ts_guesses(self, **kwargs): """ The abstract method for optimizing TS guesses. It will be implemented in actual classes. - The method needs to take `mol` in `RDKitMol` and `save_dir` as `str` as input arguments, and - return the optimized molecule as `RDKitMol`. + The method needs to take ``mol`` in ``RDKitMol`` and ``save_dir`` as ``str`` as input arguments, and + return the optimized molecule as ``RDKitMol``. Args: mol (RDKitMol): The TS in RDKitMol object with geometries embedded as conformers. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The optimized TS molecule in RDKitMol with 3D conformer saved with the molecule. """ raise NotImplementedError @@ -74,8 +80,8 @@ def save_opt_mols(self, Args: save_dir (str): The path to the directory to save the results. opt_mol (RDKitMol): The optimized TS molecule in RDKitMol with 3D conformer saved with the molecule. - keep_ids (dict): Dictionary of which opts succeeded and which failed - energies (dict): Dictionary of energies for each conformer + keep_ids (dict): Dictionary of which opts succeeded and which failed. + energies (dict): Dictionary of energies for each conformer. """ # Save optimized ts mols ts_path = os.path.join(save_dir, "ts_optimized_confs.sdf") @@ -102,10 +108,10 @@ def __call__(self, Args: mol (RDKitMol): An RDKitMol object with all guess geometries embedded as conformers. - save_dir (str, optional): The path to save results. Defaults to None. + save_dir (str, optional): The path to save results. Defaults to ``None``. Returns: - 'RDKitMol': The optimized molecule as RDKitMol with 3D geometries embedded. + 'RDKitMol': The optimized TS molecule as ``RDKitMol`` with 3D geometries embedded. """ time_start = time() @@ -126,6 +132,13 @@ class SellaOptimizer(TSOptimizer): """ The class to optimize TS geometries using the Sella algorithm. It uses XTB as the backend calculator, ASE as the interface, and Sella module from the Sella repo. + + Args: + method (str, optional): The method in XTB used to optimize the geometry. Options are + ``'GFN1-xTB'`` and ``'GFN2-xTB'``. Defaults to ``"GFN2-xTB"``. + fmax (float, optional): The force threshold used in the optimization. Defaults to ``1e-3``. + steps (int, optional): Max number of steps allowed in the optimization. Defaults to ``1000``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ def __init__(self, method: str = "GFN2-xTB", @@ -156,10 +169,10 @@ def optimize_ts_guesses(self, Args: mol (RDKitMol): An RDKitMol object with all guess geometries embedded as conformers. - save_dir (str, optional): The path to save results. Defaults to None. + save_dir (str, optional): The path to save results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The optimized TS molecule in RDKitMol with 3D conformer saved with the molecule. """ opt_mol = mol.Copy(copy_attrs=["KeepIDs"]) opt_mol.energy = {} @@ -192,7 +205,14 @@ def optimize_ts_guesses(self, class OrcaOptimizer(TSOptimizer): """ The class to optimize TS geometries using the Berny algorithm built in Orca. - You have to have the Orca package installed to run this optimizer + You have to have the Orca package installed to run this optimizer. + + Args: + method (str, optional): The method to be used for TS optimization. you can use the level of theory available in Orca. + If you want to use XTB methods, you need to put the xtb binary into the Orca directory. + Defaults to ``"XTB2"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ def __init__(self, method: str = "XTB2", @@ -204,9 +224,10 @@ def __init__(self, Args: method (str, optional): The method to be used for TS optimization. you can use the level of theory available in Orca. - If you want to use XTB methods, you need to put the xtb binary into the Orca directory. Defaults to XTB2. - nprocs (int, optional): The number of processors to use. Defaults to 1. - track_stats (bool, optional): Whether to track the status. Defaults to False. + If you want to use XTB methods, you need to put the xtb binary into the Orca directory. + Defaults to ``"XTB2"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(OrcaOptimizer, self).__init__(track_stats) @@ -219,13 +240,18 @@ def __init__(self, else: self.orca_binary = ORCA_BINARY - def extract_frequencies(self, save_dir, n_atoms): + def extract_frequencies(self, + save_dir: str, + n_atoms: int): """ Extract frequencies from the Orca opt job. Args: save_dir (str): Path where Orca logs are saved. n_atoms (int): The number of atoms in the molecule. + + Returns: + np.ndarray: The frequencies in cm-1. """ log_file = os.path.join(save_dir, "orca_opt.log") @@ -256,11 +282,11 @@ def optimize_ts_guesses(self, Args: mol (RDKitMol): An RDKitMol object with all guess geometries embedded as conformers. - multiplicity (int): The multiplicity of the molecule. Defaults to 1. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + multiplicity (int): The multiplicity of the molecule. Defaults to ``1``. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The optimized TS molecule in RDKitMol with 3D conformer saved with the molecule. """ opt_mol = mol.Copy(quickCopy=True, copy_attrs=["KeepIDs"]) opt_mol.energy = {} # TODO: add orca energies @@ -323,6 +349,14 @@ class GaussianOptimizer(TSOptimizer): """ The class to optimize TS geometries using the Berny algorithm built in Gaussian. You have to have the Gaussian package installed to run this optimizer + + Args: + method (str, optional): The method to be used for TS optimization. you can use the level of theory available in Gaussian. + We provided a script to run XTB using Gaussian, but there are some extra steps to do. + Defaults to ``"GFN2-xTB"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ def __init__(self, @@ -335,10 +369,11 @@ def __init__(self, Args: method (str, optional): The method to be used for TS optimization. you can use the level of theory available in Gaussian. - We provided a script to run XTB using Gaussian, but there are some extra steps to do. Defaults to GFN2-xTB. - nprocs (int, optional): The number of processors to use. Defaults to 1. - memory (int, optional): Memory in GB used by Gaussian. Defaults to 1. - track_stats (bool, optional): Whether to track the status. Defaults to False. + We provided a script to run XTB using Gaussian, but there are some extra steps to do. + Defaults to ``"GFN2-xTB"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(GaussianOptimizer, self).__init__(track_stats) @@ -365,11 +400,11 @@ def optimize_ts_guesses(self, Args: mol (RDKitMol): An RDKitMol object with all guess geometries embedded as conformers. - multiplicity (int): The multiplicity of the molecule. Defaults to 1. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + multiplicity (int): The multiplicity of the molecule. Defaults to ``1``. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The optimized TS molecule in RDKitMol with 3D conformer saved with the molecule. """ opt_mol = mol.Copy(quickCopy=True, copy_attrs=["KeepIDs"]) opt_mol.energy = {} @@ -432,8 +467,16 @@ def optimize_ts_guesses(self, class QChemOptimizer(TSOptimizer): """ - The class to optimize TS geometries using the Baker’s eigenvector-following (EF) algorithm built in QChem. - You have to have the QChem package installed to run this optimizer + The class to optimize TS geometries using the Baker's eigenvector-following (EF) algorithm built in QChem. + You have to have the QChem package installed to run this optimizer. + + Args: + method (str, optional): The method to be used for TS optimization. you can use the method available in QChem. + Defaults to ``"wB97x-d3"``. + basis (str, optional): The method to be used for TS optimization. you can use the basis available in QChem. + Defaults to ``"def2-tzvp"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ def __init__(self, @@ -442,13 +485,15 @@ def __init__(self, nprocs: int = 1, track_stats: bool = False): """ - Initiate the QChem berny optimizer. + Initiate the QChem EF optimizer. Args: - method (str, optional): The method to be used for TS optimization. you can use the method available in QChem. Defaults to wB97x-d3. - basis (str, optional): The method to be used for TS optimization. you can use the basis available in QChem. Defaults to def2-tzvp. - nprocs (int, optional): The number of processors to use. Defaults to 1. - track_stats (bool, optional): Whether to track the status. Defaults to False. + method (str, optional): The method to be used for TS optimization. you can use the method available in QChem. + Defaults to ``"wB97x-d3"``. + basis (str, optional): The method to be used for TS optimization. you can use the basis available in QChem. + Defaults to ``"def2-tzvp"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(QChemOptimizer, self).__init__(track_stats) @@ -472,11 +517,11 @@ def optimize_ts_guesses(self, Args: mol (RDKitMol): An RDKitMol object with all guess geometries embedded as conformers. - multiplicity (int): The multiplicity of the molecule. Defaults to 1. - save_dir (Optional[str], optional): The path to save the results. Defaults to None. + multiplicity (int): The multiplicity of the molecule. Defaults to ``1``. + save_dir (Optional[str], optional): The path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The optimized TS molecule in RDKitMol with 3D conformer saved with the molecule. """ opt_mol = mol.Copy(quickCopy=True, copy_attrs=["KeepIDs"]) opt_mol.energy = {} From 2f3d8f65808ffe712c96e7252020da8c9a1c4ca2 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 14:02:21 -0400 Subject: [PATCH 19/21] Add docs for utils in conformer_generation Add rst file and clean the docstrings --- .../reference/conformer_generation/index.rst | 2 +- .../reference/conformer_generation/utils.rst | 7 ++ rdmc/conformer_generation/utils.py | 84 ++++++++++++------- 3 files changed, 64 insertions(+), 29 deletions(-) create mode 100644 docs/source/reference/conformer_generation/utils.rst diff --git a/docs/source/reference/conformer_generation/index.rst b/docs/source/reference/conformer_generation/index.rst index dd034f03..71ca2734 100644 --- a/docs/source/reference/conformer_generation/index.rst +++ b/docs/source/reference/conformer_generation/index.rst @@ -11,4 +11,4 @@ rdmc.conformer_generation embedding_geometries/index geometry_optimization/index - + utils diff --git a/docs/source/reference/conformer_generation/utils.rst b/docs/source/reference/conformer_generation/utils.rst new file mode 100644 index 00000000..8d35d4de --- /dev/null +++ b/docs/source/reference/conformer_generation/utils.rst @@ -0,0 +1,7 @@ +Utils +===================================== + +.. automodule:: rdmc.conformer_generation.utils + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/rdmc/conformer_generation/utils.py b/rdmc/conformer_generation/utils.py index a3add713..b3fc8d1b 100644 --- a/rdmc/conformer_generation/utils.py +++ b/rdmc/conformer_generation/utils.py @@ -2,7 +2,7 @@ #-*- coding: utf-8 -*- """ -Utilities for conformer generation modules +Utilities for conformer generation modules. """ from rdkit.Chem import AllChem @@ -12,25 +12,27 @@ import pickle import numpy as np from collections import defaultdict -from typing import Union +from typing import List, Optional, Union from rdmc.utils import PERIODIC_TABLE as PT from rdmc.external.logparser import GaussianLog -def mol_to_dict(mol, +def mol_to_dict(mol: 'RDKitMol', copy: bool = True, - iter: int = None, - conf_copy_attrs: list = None): + iter: Optional[int] = None, + conf_copy_attrs: Optional[list] = None, + ) -> list[dict]: """ Convert a molecule to a dictionary that stores its conformers object, atom coordinates, and iteration numbers for a certain calculation (optional). Args: mol ('RDKitMol'): An RDKitMol object. - copy (bool, optional): Use a copy of the molecule to process data. Defaults to True. - iter (int, optional): Number of iterations. Defaults to None. + copy (bool, optional): Use a copy of the molecule to process data. Defaults to ``True``. + iter (int, optional): Number of iterations. Defaults to ``None``. conf_copy_attrs (list, optional): Conformer-level attributes to copy to the dictionary. + Defaults to ``None``, which means no attributes will be copied. Returns: list: mol data as a list of dict; each dict corresponds to a conformer. @@ -52,18 +54,19 @@ def mol_to_dict(mol, return mol_data -def dict_to_mol(mol_data, - conf_copy_attrs: list = None): +def dict_to_mol(mol_data: List[dict], + conf_copy_attrs: Optional[list] = None): """ Convert a dictionary that stores its conformers object, atom coordinates, and conformer-level attributes to an RDKitMol. The method assumes that the first conformer's owning mol contains the conformer-level attributes, which are extracted through the Copy function (this should be the case if the - dictionary was generated with the mol_to_dict function). + dictionary was generated with the :obj:`mol_to_dict` function). Args: - mol_data (list) List containing dictionaries of data entries for each conformer. + mol_data (list): A list containing dictionaries of data entries for each conformer. conf_copy_attrs (list, optional): Conformer-level attributes to copy to the mol. + Defaults to ``None``, which means no attributes will be copied. Returns: mol ('RDKitMol'): An RDKitMol object. @@ -75,7 +78,19 @@ def dict_to_mol(mol_data, return mol -def cluster_confs(mol, cutoff=1.0): +def cluster_confs(mol: 'RDKitMol', + cutoff: float = 1.0, + ) -> 'RDKitMol': + """ + Cluster conformers of a molecule based on RMSD. + + Args: + mol ('RDKitMol'): An RDKitMol object. + cutoff (float, optional): The cutoff for clustering. Defaults to ``1.0``. + + Returns: + mol ('RDKitMol'): An RDKitMol object with clustered conformers. + """ rmsmat = AllChem.GetConformerRMSMatrix(mol.ToRWMol(), prealigned=False) num = mol.GetNumConformers() clusters = Butina.ClusterData(rmsmat, num, cutoff, isDistData=True, reordering=True) @@ -87,17 +102,21 @@ def cluster_confs(mol, cutoff=1.0): return updated_mol -def get_conf_failure_mode(rxn_dir, pruner=True): +def get_conf_failure_mode(rxn_dir: str, + pruner: bool = True, + ) -> dict: """ Parse a reaction directory for a TS generation run and extract failure modes (which conformer failed the - full workflow and for what reason) + full workflow and for what reason). Args: rxn_dir (str) Path to the reaction directory. - pruner (bool: Optional) Whether or not pruner was used during workflow + pruner (bool: Optional) Whether or not pruner was used during workflow. Defaults to ``True``. Returns: failure_dict ('dict'): Dictionary of conformer ids mapped to the corresponding failure mode. + the ``failure_mode`` can be one of the following: + ``opt``, ``prune``, ``freq``, ``irc``, ``workflow``, ``none``. """ failure_modes = { @@ -133,21 +152,24 @@ def get_conf_failure_mode(rxn_dir, pruner=True): return failure_dict -def get_frames_from_freq(log, +def get_frames_from_freq(log: GaussianLog, amplitude: float = 1.0, num_frames: int = 10, - weights: Union[bool, np.array] = False): + weights: Union[bool, np.array] = False, + ) -> (np.array,np.array): """ + Get the reaction mode as frames from a TS optimization log file. + Args: log (GaussianLog): A gaussian log object with vibrational freq calculated. amplitude (float): The amplitude of the motion. If a single value is provided then the guess - will be unique (if available). 0.25 will be the default. Otherwise, a list + will be unique (if available). ``0.25`` is the default. Otherwise, a list can be provided, and all possible results will be returned. - num_frames (int): The number of frames in each direction (forward and reverse). Defaults to 10. + num_frames (int): The number of frames in each direction (forward and reverse). Defaults to ``10``. weights (bool or np.array): If ``True``, use the sqrt(atom mass) as a scaling factor to the displacement. - If ``False``, use the identity weights. If a N x 1 ``np.array` is provided, then - The concern is that light atoms (e.g., H) tend to have larger motions - than heavier atoms. + If ``False``, use the identity weights. If a N x 1 ``np.array`` is provided, then + The concern is that light atoms (e.g., H) tend to have larger motions + than heavier atoms. Returns: np.array: The atomic numbers as an 1D array @@ -174,18 +196,24 @@ def get_frames_from_freq(log, def convert_log_to_mol(log_path: str, amplitude: float = 1.0, num_frames: int = 10, - weights: Union[bool, np.array] = False): + weights: Union[bool, np.array] = False, + ) -> Union[None,'RDKitMol']: """ + Convert a TS optimization log file to an RDKitMol object with conformers. + Args: log_path (str): The path to the log file. amplitude (float): The amplitude of the motion. If a single value is provided then the guess - will be unique (if available). 0.25 will be the default. Otherwise, a list + will be unique (if available). ``0.25`` is the default. Otherwise, a list can be provided, and all possible results will be returned. - num_frames (int): The number of frames in each direction (forward and reverse). Defaults to 10. + num_frames (int): The number of frames in each direction (forward and reverse). Defaults to ``10``. weights (bool or np.array): If ``True``, use the sqrt(atom mass) as a scaling factor to the displacement. - If ``False``, use the identity weights. If a N x 1 ``np.array` is provided, then - The concern is that light atoms (e.g., H) tend to have larger motions - than heavier atoms. + If ``False``, use the identity weights. If a N x 1 ``np.array`` is provided, then + The concern is that light atoms (e.g., H) tend to have larger motions + than heavier atoms. + + Returns: + mol ('RDKitMol'): An RDKitMol object. """ glog = GaussianLog(log_path) From b4cef53091944bd8207512a227f9bb2aad7218f8 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 15:00:56 -0400 Subject: [PATCH 20/21] Add docs for conformer generation postprocessings 1. docs are added for pruners, verifiers, ts_verifiers, and solvation 2. relevant docstrings are cleaned --- .../reference/conformer_generation/index.rst | 1 + .../postprocessing/index.rst | 10 + .../postprocessing/pruners.rst | 7 + .../postprocessing/solvation.rst | 7 + .../postprocessing/ts_verifiers.rst | 7 + .../postprocessing/verifiers.rst | 7 + rdmc/conformer_generation/pruners.py | 212 ++++++++++++++++-- rdmc/conformer_generation/solvation.py | 53 +++-- rdmc/conformer_generation/ts_verifiers.py | 138 ++++++++---- rdmc/conformer_generation/verifiers.py | 41 ++-- 10 files changed, 382 insertions(+), 101 deletions(-) create mode 100644 docs/source/reference/conformer_generation/postprocessing/index.rst create mode 100644 docs/source/reference/conformer_generation/postprocessing/pruners.rst create mode 100644 docs/source/reference/conformer_generation/postprocessing/solvation.rst create mode 100644 docs/source/reference/conformer_generation/postprocessing/ts_verifiers.rst create mode 100644 docs/source/reference/conformer_generation/postprocessing/verifiers.rst diff --git a/docs/source/reference/conformer_generation/index.rst b/docs/source/reference/conformer_generation/index.rst index 71ca2734..4faf5535 100644 --- a/docs/source/reference/conformer_generation/index.rst +++ b/docs/source/reference/conformer_generation/index.rst @@ -11,4 +11,5 @@ rdmc.conformer_generation embedding_geometries/index geometry_optimization/index + postprocessing/index utils diff --git a/docs/source/reference/conformer_generation/postprocessing/index.rst b/docs/source/reference/conformer_generation/postprocessing/index.rst new file mode 100644 index 00000000..6ea4e87e --- /dev/null +++ b/docs/source/reference/conformer_generation/postprocessing/index.rst @@ -0,0 +1,10 @@ +Postprocessing +===================== + +.. toctree:: + :maxdepth: 2 + + verifiers + ts_verifiers + pruners + solvation diff --git a/docs/source/reference/conformer_generation/postprocessing/pruners.rst b/docs/source/reference/conformer_generation/postprocessing/pruners.rst new file mode 100644 index 00000000..ce55c71f --- /dev/null +++ b/docs/source/reference/conformer_generation/postprocessing/pruners.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.pruners +======================================= + +.. automodule:: rdmc.conformer_generation.pruners + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/postprocessing/solvation.rst b/docs/source/reference/conformer_generation/postprocessing/solvation.rst new file mode 100644 index 00000000..90badf74 --- /dev/null +++ b/docs/source/reference/conformer_generation/postprocessing/solvation.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.solvation +======================================= + +.. automodule:: rdmc.conformer_generation.solvation + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/postprocessing/ts_verifiers.rst b/docs/source/reference/conformer_generation/postprocessing/ts_verifiers.rst new file mode 100644 index 00000000..bfd5ec12 --- /dev/null +++ b/docs/source/reference/conformer_generation/postprocessing/ts_verifiers.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.ts_verifiers +======================================= + +.. automodule:: rdmc.conformer_generation.ts_verifiers + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/postprocessing/verifiers.rst b/docs/source/reference/conformer_generation/postprocessing/verifiers.rst new file mode 100644 index 00000000..e236948e --- /dev/null +++ b/docs/source/reference/conformer_generation/postprocessing/verifiers.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.verifiers +======================================= + +.. automodule:: rdmc.conformer_generation.verifiers + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/rdmc/conformer_generation/pruners.py b/rdmc/conformer_generation/pruners.py index 430ec43b..190f23cd 100644 --- a/rdmc/conformer_generation/pruners.py +++ b/rdmc/conformer_generation/pruners.py @@ -5,9 +5,12 @@ Modules for pruning a group of conformers """ -from rdmc.mol import RDKitMol -import numpy as np from time import time +from typing import List, Optional + +import numpy as np + +from rdmc.mol import RDKitMol try: from rdmc.external.xtb_tools.crest import run_cre_check except ImportError: @@ -15,7 +18,14 @@ class ConfGenPruner: - def __init__(self, track_stats=False): + """ + Base class for conformer pruning. + + Args: + track_stats (bool, optional): Whether to track statistics. Defaults to ``False``. + """ + def __init__(self, + track_stats: bool = False): self.iter = 0 self.track_stats = track_stats @@ -24,11 +34,42 @@ def __init__(self, track_stats=False): self.n_output_confs = None self.stats = [] - def prune_conformers(self, current_mol_data, unique_mol_data=None, sort_by_energy=True, return_ids=False): + def prune_conformers(self, + current_mol_data: List[dict], + unique_mol_data: Optional[List[dict]] = None, + sort_by_energy: bool = True, + return_ids: bool = False): + """ + Prune conformers. + + Args: + current_mol_data (list[dict]): conformer data of the current iteration. + unique_mol_data (list[dict], optional): Unique conformer data of previous iterations. Defaults to ``None``. + sort_by_energy (bool, optional): Whether to sort conformers by energy. Defaults to ``True``. + return_ids (bool, optional): Whether to return conformer IDs. Defaults to ``False``. + + Raises: + NotImplementedError: This method should be implemented in the subclass. + """ raise NotImplementedError - def __call__(self, current_mol_data, unique_mol_data=None, sort_by_energy=True, return_ids=False): - + def __call__(self, + current_mol_data: List[dict], + unique_mol_data: Optional[List[dict]] = None, + sort_by_energy: bool = True, + return_ids: bool = False): + """ + Execute the task of pruning conformers. + + Args: + current_mol_data (list[dict]): conformer data of the current iteration. + unique_mol_data (list[dict], optional): Unique conformer data of previous iterations. Defaults to ``None``. + sort_by_energy (bool, optional): Whether to sort conformers by energy. Defaults to ``True``. + return_ids (bool, optional): Whether to return conformer IDs. Defaults to ``False``. + + Returns: + list[dict]: Updated conformer data. + """ self.iter += 1 time_start = time() mol_data = self.prune_conformers(current_mol_data, unique_mol_data, sort_by_energy, return_ids) @@ -51,20 +92,42 @@ class TorsionPruner(ConfGenPruner): Prune conformers based on torsion angle criteria. This method uses a mean and max criteria to prune conformers: A conformer is considered unique if it satisfies either of the following criteria: - mean difference of all torsion angles > mean_chk_threshold - max difference of all torsion angles > max_chk_threshold - New conformers are compared to all conformers that have already been deemed unique + + - mean difference of all torsion angles > mean_chk_threshold + - max difference of all torsion angles > max_chk_threshold + + New conformers are compared to all conformers that have already been deemed unique. + + Args: + mean_chk_threshold (float, optional): Mean difference threshold. Defaults to ``10.``. + max_chk_threshold (float, optional): Max difference threshold. Defaults to ``20.``. + track_stats (bool, optional): Whether to track statistics. Defaults to ``False``. """ - def __init__(self, mean_chk_threshold=10, max_chk_threshold=20, track_stats=False): + def __init__(self, + mean_chk_threshold: float = 10., + max_chk_threshold: float = 20., + track_stats: bool = False): super(TorsionPruner, self).__init__(track_stats) self.mean_chk_threshold = mean_chk_threshold self.max_chk_threshold = max_chk_threshold self.torsions_list = None - def initialize_torsions_list(self, smiles=None, torsions=None, excludeMethyl=False): - + def initialize_torsions_list(self, + smiles: Optional[str] = None, + torsions: Optional[list] = None, + excludeMethyl: bool = False): + """ + Initialize the list of torsions to be used for comparison and pruning. + + Args: + smiles (str, optional): SMILES of the molecule. Defaults to ``None``. This should be provided if + ``torsions`` is not provided. + torsions (list, optional): List of torsions. Defaults to ``None``, + in which case the torsions will be extracted from the molecule. + excludeMethyl (bool, optional): Whether to exclude methyl groups. Defaults to ``False``. + """ if torsions: self.torsions_list = torsions elif smiles: @@ -73,7 +136,20 @@ def initialize_torsions_list(self, smiles=None, torsions=None, excludeMethyl=Fal else: raise ValueError("Either a SMILES or a list of torsional modes should be provided.") - def initialize_ts_torsions_list(self, rxn_smiles=None, torsions=None, excludeMethyl=False): + def initialize_ts_torsions_list(self, + rxn_smiles: Optional[str] = None, + torsions: Optional[list] = None, + excludeMethyl: bool = False): + """ + Initialize the list of torsions to be used for comparison and pruning for TS molecules. + + Args: + rxn_smiles (str, optional): SMILES of the reaction. Defaults to ``None``. This should be provided if + ``torsions`` is not provided. + torsions (list, optional): List of torsions. Defaults to ``None``, in which case the torsions will be + extracted according to the reactants and the products. + excludeMethyl (bool, optional): Whether to exclude methyl groups. Defaults to ``False``. + """ if torsions: self.torsions_list = torsions @@ -87,26 +163,74 @@ def initialize_ts_torsions_list(self, rxn_smiles=None, torsions=None, excludeMet else: raise ValueError("Either a SMILES or a list of torsional modes should be provided.") - def calculate_torsions(self, mol_data): + def calculate_torsions(self, + mol_data: List[dict], + ) -> List[dict]: + """ + Calculate torsions for a list of conformers. + Args: + mol_data (list[dict]): conformer data. + + Returns: + list[dict]: Conformer data with values of torsions added. + """ for conf_data in mol_data: conf = conf_data["conf"] torsions = np.array([conf.GetTorsionDeg(t) for t in self.torsions_list]) % 360 conf_data.update({"torsions": torsions}) return mol_data - def rad_angle_compare(self, x, y): + @staticmethod + def rad_angle_compare(x: float, + y: float, + ) -> float: + """ + Compare two angles in radians. + + Args: + x (float): angle in degrees. + y (float): angle in degrees. - # compare angles in radians + Returns: + float: Absolute difference between the two angles in radians. + """ return np.abs(np.arctan2(np.sin(x - y), np.cos(x - y))) * 180 / np.pi - def torsion_list_compare(self, c1_ts, c2_ts): + @staticmethod + def torsion_list_compare(c1_ts: List[float], + c2_ts: List[float], + ) -> list[float]: + """ + Compare two lists of torsions in radians. - # compare two lists of torsions in radians - return [self.rad_angle_compare(t1, t2) for t1, t2 in zip(c1_ts, c2_ts)] - - def prune_conformers(self, current_mol_data, unique_mol_data=None, sort_by_energy=True, return_ids=False): + Args: + c1_ts (list): list of torsions in degrees. + c2_ts (list): list of torsions in degress. + Returns: + list: Absolute difference between the two lists of torsions in radians. + """ + # compare two lists of torsions in radians + return [TorsionPruner.rad_angle_compare(t1, t2) for t1, t2 in zip(c1_ts, c2_ts)] + + def prune_conformers(self, + current_mol_data: List[dict], + unique_mol_data: Optional[List[dict]] = None, + sort_by_energy: bool = True, + return_ids: bool = False): + """ + Prune conformers. + + Args: + current_mol_data (list[dict]): conformer data of the current iteration. + unique_mol_data (list[dict], optional): Unique conformer data of previous iterations. Defaults to ``None``. + sort_by_energy (bool, optional): Whether to sort conformers by energy. Defaults to ``True``. + return_ids (bool, optional): Whether to return conformer IDs. Defaults to ``False``. + + Returns: + list[dict]: Updated conformer data. + """ if unique_mol_data is None: unique_mol_data = [] @@ -153,7 +277,32 @@ def prune_conformers(self, current_mol_data, unique_mol_data=None, sort_by_energ class CRESTPruner(ConfGenPruner): - def __init__(self, ethr=0.15, rthr=0.125, bthr=0.01, ewin=10000, track_stats=False): + """ + Prune conformers using CREST. + + Args: + ethr (float, optional): Energy threshold. Defaults to ``0.15``. + rthr (float, optional): RMSD threshold. Defaults to ``0.125``. + bthr (float, optional): Bond threshold. Defaults to ``0.01``. + ewin (int, optional): Energy window. Defaults to ``10000``. + track_stats (bool, optional): Whether to track statistics. Defaults to ``False``. + """ + def __init__(self, + ethr: float = 0.15, + rthr: float = 0.125, + bthr: float = 0.01, + ewin: float = 10000, + track_stats: bool = False): + """ + Initialize the CREST pruner. + + Args: + ethr (float, optional): Energy threshold. Defaults to ``0.15``. + rthr (float, optional): RMSD threshold. Defaults to ``0.125``. + bthr (float, optional): Bond threshold. Defaults to ``0.01``. + ewin (int, optional): Energy window. Defaults to ``10000``. + track_stats (bool, optional): Whether to track statistics. Defaults to ``False``. + """ super(CRESTPruner, self).__init__(track_stats) self.ethr = ethr @@ -161,8 +310,23 @@ def __init__(self, ethr=0.15, rthr=0.125, bthr=0.01, ewin=10000, track_stats=Fal self.bthr = bthr self.ewin = ewin - def prune_conformers(self, current_mol_data, unique_mol_data=None, sort_by_energy=True, return_ids=False): - + def prune_conformers(self, + current_mol_data: List[dict], + unique_mol_data: Optional[List[dict]] = None, + sort_by_energy: bool = True, + return_ids: bool = False): + """ + Prune conformers. + + Args: + current_mol_data (list[dict]): conformer data of the current iteration. + unique_mol_data (list[dict], optional): Unique conformer data of previous iterations. Defaults to ``None``. + sort_by_energy (bool, optional): Whether to sort conformers by energy. Defaults to ``True``. + return_ids (bool, optional): Whether to return conformer IDs. Defaults to ``False``. + + Returns: + list[dict]: Updated conformer data. + """ if unique_mol_data is None: unique_mol_data = [] diff --git a/rdmc/conformer_generation/solvation.py b/rdmc/conformer_generation/solvation.py index 7948596c..5457ea18 100644 --- a/rdmc/conformer_generation/solvation.py +++ b/rdmc/conformer_generation/solvation.py @@ -2,16 +2,17 @@ #-*- coding: utf-8 -*- """ -Modules for including solvation corrections +Modules for including computing solvation corrections. """ import os -from typing import Optional -from ase import Atoms from time import time -import torch +from typing import List, Optional + +from ase import Atoms try: + import torch from conf_solv.trainer import LitConfSolvModule from conf_solv.dataloaders.collate import Collater from conf_solv.dataloaders.loader import create_pairdata, MolGraph @@ -22,43 +23,50 @@ class Estimator: """ The abstract class for energy estimator. + + Args: + track_stats (bool, optional): Whether to track timing stats. Defaults to ``False``. """ def __init__(self, track_stats: Optional[bool] = False): """ Initialize the TS optimizer. + + Args: + track_stats (bool, optional): Whether to track timing stats. Defaults to ``False``. """ self.track_stats = track_stats self.stats = [] def predict_energies(self, - mol_data: dict, + mol_data: List[dict], **kwargs): """ The abstract method for predicting energies. It will be implemented in actual classes. - The method needs to take `mol_data` which is a dictionary containing info about the - conformers of the moelcule. It will return the molecule as the same 'mol_data' object + The method needs to take ``mol_data`` which is a dictionary containing info about the + conformers of the molecules. It will return the molecule as the same ``mol_data`` object with the energy values altered. Args: - mol_data (list): A list of molecule dictionaries. + mol_data (List[dict]): A list of molecule dictionaries. Returns: - mol_data + mol_data (List[dict]): A list of molecule dictionaries with energy values updated. """ raise NotImplementedError def __call__(self, - mol_data: dict, - **kwargs): + mol_data: List[dict], + **kwargs, + ) -> List[dict]: """ - Run the workflow to predict energies. + Run the workflow to predict solvation energies. Args: - mol_data (list): A list of molecule dictionaries. + mol_data (List[dict]): A list of molecule dictionaries. Returns: - mol_data + mol_data (List[dict]): A list of molecule dictionaries with energy values updated. """ time_start = time() updated_mol_data = self.predict_energies(mol_data=mol_data, **kwargs) @@ -77,6 +85,10 @@ def __call__(self, class ConfSolv(Estimator): """ Class for estimating conformer energies in solution with neural networks. + + Args: + trained_model_dir (str): The path to the directory storing the trained ConfSolv model. + track_stats (bool, optional): Whether to track timing stats. Defaults to ``False``. """ def __init__(self, @@ -87,7 +99,7 @@ def __init__(self, Args: trained_model_dir (str): The path to the directory storing the trained ConfSolv model. - track_stats (bool, optional): Whether to track timing stats. Defaults to False. + track_stats (bool, optional): Whether to track timing stats. Defaults to ``False``. """ super(ConfSolv, self).__init__(track_stats) @@ -99,16 +111,17 @@ def __init__(self, self.collater = Collater(follow_batch=["x_solvent", "x_solute"], exclude_keys=None) def predict_energies(self, - mol_data: list, - **kwargs): + mol_data: List[dict], + **kwargs, + ) -> List[dict]: """ - Predict conformer free energies in given solvent. + Predict conformer free energies in a given solvent. Args: - mol_data (list): A list of molecule dictionaries. + mol_data (List[dict]): A list of molecule dictionaries. Returns: - mol_data + mol_data (List[dict]): A list of molecule dictionaries with energy values updated. """ # prepare inputs syms = [a.GetSymbol() for a in mol_data[0]['conf'].ToMol().GetAtoms()] diff --git a/rdmc/conformer_generation/ts_verifiers.py b/rdmc/conformer_generation/ts_verifiers.py index 0b9c0455..9c25f54a 100644 --- a/rdmc/conformer_generation/ts_verifiers.py +++ b/rdmc/conformer_generation/ts_verifiers.py @@ -37,6 +37,9 @@ class TSVerifier: """ The abstract class for TS verifiers. + + Args: + track_stats (bool, optional): Whether to track status. Defaults to ``False``. """ def __init__(self, track_stats: bool = False): @@ -44,7 +47,7 @@ def __init__(self, Initialize the TS verifier. Args: - track_stats (bool, optional): Whether to track status. Defaults to False. + track_stats (bool, optional): Whether to track status. Defaults to ``False``. """ self.track_stats = track_stats self.n_failures = None @@ -59,16 +62,15 @@ def verify_ts_guesses(self, **kwargs): """ The abstract method for verifying TS guesses (or optimized TS geometries). The method need to take - `ts_mol` in RDKitMol, `keep_ids` in list, `multiplicity` in int, and `save_dir` in str, and returns - a list indicating the ones passing the check. + ``ts_mol`` in ``RDKitMol``, ``keep_ids`` in ``list``, ``multiplicity`` in ``int``, and ``save_dir`` in ``str``. Args: ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the TS. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the TS. Defaults to ``1``. + save_dir (str, optional): The directory path to save the results. Defaults to ``None``. Raises: - NotImplementedError + NotImplementedError: This method needs to be implemented in the subclass. """ raise NotImplementedError @@ -76,17 +78,18 @@ def __call__(self, ts_mol: 'RDKitMol', multiplicity: int = 1, save_dir: Optional[str] = None, - **kwargs): + **kwargs, + ) -> 'RDKitMol': """ Run the workflow for verifying the TS guessers (or optimized TS conformers). Args: ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the TS. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the TS. Defaults to ``1``. + save_dir (str, optional): The directory path to save the results. Defaults to ``None``. Returns: - list: a list of true and false + RDKitMol: The TS in RDKitMol object with verification results stored in ``KeepIDs``. """ time_start = time() ts_mol = self.verify_ts_guesses( @@ -107,18 +110,24 @@ def __call__(self, class XTBFrequencyVerifier(TSVerifier): """ The class for verifying the TS by calculating and checking its frequencies using XTB. + + Args: + cutoff_frequency (float, optional): Cutoff frequency above which a frequency does not correspond to a TS + imaginary frequency to avoid small magnitude frequencies which correspond to internal bond rotations + Defaults to ``-100.`` cm-1 + track_stats (bool, optional): Whether to track stats. Defaults to ``False``. """ def __init__(self, - cutoff_frequency: int = -100, + cutoff_frequency: float = -100., track_stats: bool = False): """ Initiate the XTB frequency verifier. Args: - cutoff_frequency (int, optional): Cutoff frequency above which a frequency does not correspond to a TS - imaginary frequency to avoid small magnitude frequencies which correspond to internal bond rotations - (defaults to -100 cm-1) - track_stats (bool, optional): Whether to track stats. Defaults to False. + cutoff_frequency (float, optional): Cutoff frequency above which a frequency does not correspond to a TS + imaginary frequency to avoid small magnitude frequencies which correspond to internal bond rotations + Defaults to ``-100.`` cm-1 + track_stats (bool, optional): Whether to track stats. Defaults to ``False``. """ super(XTBFrequencyVerifier, self).__init__(track_stats) @@ -128,17 +137,18 @@ def verify_ts_guesses(self, ts_mol: 'RDKitMol', multiplicity: int = 1, save_dir: Optional[str] = None, - **kwargs): + **kwargs, + ) -> 'RDKitMol': """ Verifying TS guesses (or optimized TS geometries). Args: ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the TS. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the TS. Defaults to ``1``. + save_dir (str, optional): The directory path to save the results. Defaults to ``None``. Returns: - list + RDKitMol: The molecule in RDKitMol object with verification results stored in ``KeepIDs``. """ for i in range(ts_mol.GetNumConformers()): if ts_mol.KeepIDs[i]: @@ -162,6 +172,13 @@ def verify_ts_guesses(self, class OrcaIRCVerifier(TSVerifier): """ The class for verifying the TS by calculating and checking its IRC analysis using Orca. + + Args: + method (str, optional): The method to be used for TS optimization. you can use the level of theory available in Orca. + If you want to use XTB methods, you need to put the xtb binary into the Orca directory. + Defaults to ``"XTB2"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ def __init__(self, @@ -173,9 +190,10 @@ def __init__(self, Args: method (str, optional): The method to be used for TS optimization. you can use the level of theory available in Orca. - If you want to use XTB methods, you need to put the xtb binary into the Orca directory. Defaults to XTB2. - nprocs (int, optional): The number of processors to use. Defaults to 1. - track_stats (bool, optional): Whether to track the status. Defaults to False. + If you want to use XTB methods, you need to put the xtb binary into the Orca directory. + Defaults to ``"XTB2"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(OrcaIRCVerifier, self).__init__(track_stats) @@ -192,14 +210,18 @@ def verify_ts_guesses(self, ts_mol: 'RDKitMol', multiplicity: int = 1, save_dir: Optional[str] = None, - **kwargs): + **kwargs, + ) -> 'RDKitMol': """ Verifying TS guesses (or optimized TS geometries). Args: - ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the TS. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + ts_mol (RDKitMol): The TS in RDKitMol object with 3D geometries embedded. + multiplicity (int, optional): The spin multiplicity of the TS. Defaults to ``1``. + save_dir (str, optional): The directory path to save the results. Defaults to ``None``. + + Returns: + RDKitMol: The molecule in RDKitMol object with verification results stored in ``KeepIDs``. """ for i in range(ts_mol.GetNumConformers()): if ts_mol.KeepIDs[i]: @@ -270,6 +292,15 @@ def verify_ts_guesses(self, class GaussianIRCVerifier(TSVerifier): """ The class for verifying the TS by calculating and checking its IRC analysis using Gaussian. + + Args: + method (str, optional): The method to be used for TS optimization. you can use the level of theory available in Gaussian. + We provided a script to run XTB using Gaussian, but there are some extra steps to do. + Defaults to ``"GFN2-xTB"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + fc_kw (str, optional): Keyword specifying how often to compute force constants Defaults to ``"calcall"``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ def __init__(self, @@ -309,14 +340,18 @@ def verify_ts_guesses(self, ts_mol: 'RDKitMol', multiplicity: int = 1, save_dir: Optional[str] = None, - **kwargs): + **kwargs, + ) -> RDKitMol: """ Verifying TS guesses (or optimized TS geometries). Args: ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the TS. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the TS. Defaults to ``1``. + save_dir (str, optional): The directory path to save the results. Defaults to ``None``. + + Returns: + RDKitMol: The molecule in RDKitMol object with verification results stored in ``KeepIDs``. """ for i in range(ts_mol.GetNumConformers()): if ts_mol.KeepIDs[i]: @@ -404,6 +439,14 @@ def verify_ts_guesses(self, class QChemIRCVerifier(TSVerifier): """ The class for verifying the TS by calculating and checking its IRC analysis using QChem. + + Args: + method (str, optional): The method to be used for TS optimization. you can use the method available in QChem. + Defaults to ``"wB97x-d3"``. + basis (str, optional): The method to be used for TS optimization. you can use the basis available in QChem. + Defaults to ``"def2-tzvp"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ def __init__(self, @@ -415,10 +458,12 @@ def __init__(self, Initiate the QChem IRC verifier. Args: - method (str, optional): The method to be used for TS optimization. you can use the method available in QChem. Defaults to wB97x-d3. - basis (str, optional): The method to be used for TS optimization. you can use the basis available in QChem. Defaults to def2-tzvp. - nprocs (int, optional): The number of processors to use. Defaults to 1. - track_stats (bool, optional): Whether to track the status. Defaults to False. + method (str, optional): The method to be used for TS optimization. you can use the method available in QChem. + Defaults to ``"wB97x-d3"``. + basis (str, optional): The method to be used for TS optimization. you can use the basis available in QChem. + Defaults to ``"def2-tzvp"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + track_stats (bool, optional): Whether to track the status. Defaults to ``False``. """ super(QChemIRCVerifier, self).__init__(track_stats) @@ -442,8 +487,11 @@ def verify_ts_guesses(self, Args: ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the TS. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the TS. Defaults to ``1``. + save_dir (_type_, optional): The directory path to save the results. Defaults to ``None``. + + Returns: + RDKitMol: The molecule in RDKitMol object with verification results stored in ``KeepIDs``. """ for i in range(ts_mol.GetNumConformers()): if ts_mol.KeepIDs[i]: @@ -528,19 +576,24 @@ def verify_ts_guesses(self, class TSScreener(TSVerifier): """ The class for screening TS guesses using graph neural networks. + + Args: + trained_model_dir (str): The path to the directory storing the trained TS-Screener model. + threshold (float): Threshold prediction at which we classify a failure/success. Defaults to ``0.95``. + track_stats (bool, optional): Whether to track timing stats. Defaults to ``False``. """ def __init__(self, trained_model_dir: str, - threshold: Optional[int], + threshold: float = 0.95, track_stats: Optional[bool] = False): """ Initialize the TS-Screener model. Args: trained_model_dir (str): The path to the directory storing the trained TS-Screener model. - threshold (int): Threshold prediction at which we classify a failure/success. - track_stats (bool, optional): Whether to track timing stats. Defaults to False. + threshold (float): Threshold prediction at which we classify a failure/success. Defaults to ``0.95``. + track_stats (bool, optional): Whether to track timing stats. Defaults to ``False``. """ super(TSScreener, self).__init__(track_stats) @@ -558,17 +611,18 @@ def verify_ts_guesses(self, ts_mol: 'RDKitMol', multiplicity: int = 1, save_dir: Optional[str] = None, - **kwargs): + **kwargs, + ) -> 'RDKitMol': """ Screen poor TS guesses by using reacting mode from frequency calculation. Args: ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. multiplicity (int, optional): The spin multiplicity of the TS. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + save_dir (str, optional): The directory path to save the results. Defaults to None. Returns: - None + RDKitMol: The molecule in RDKitMol object with verification results stored in ``KeepIDs``. """ rxn_smiles = kwargs["rxn_smiles"] mol_data, ids = [], [] @@ -607,3 +661,5 @@ def verify_ts_guesses(self, # write ids to file with open(os.path.join(save_dir, "screener_check_ids.pkl"), "wb") as f: pickle.dump(ts_mol.KeepIDs, f) + + return ts_mol diff --git a/rdmc/conformer_generation/verifiers.py b/rdmc/conformer_generation/verifiers.py index 7ed89f25..41b062d5 100644 --- a/rdmc/conformer_generation/verifiers.py +++ b/rdmc/conformer_generation/verifiers.py @@ -21,6 +21,9 @@ class Verifier: """ The abstract class for verifiers. + + Args: + track_stats (bool, optional): Whether to track status. Defaults to ``False``. """ def __init__(self, track_stats: bool = False): @@ -28,7 +31,7 @@ def __init__(self, Initialize the verifier. Args: - track_stats (bool, optional): Whether to track status. Defaults to False. + track_stats (bool, optional): Whether to track status. Defaults to ``False``. """ self.track_stats = track_stats self.n_failures = None @@ -43,13 +46,13 @@ def verify_guesses(self, **kwargs): """ The abstract method for verifying guesses (or optimized stable species geometries). The method need to take - `mol` in RDKitMol, `keep_ids` in list, `multiplicity` in int, and `save_dir` in str, and returns - a list indicating the ones passing the check. + ``mol`` in RDKitMol, ``keep_ids`` in list, ``multiplicity`` in int, and ``save_dir`` in str, and returns + a ``list`` indicating the ones passing the check. Args: mol ('RDKitMol'): The stable species in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the stable species. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the stable species. Defaults to ``1``. + save_dir (_type_, optional): The directory path to save the results. Defaults to ``None``. Raises: NotImplementedError @@ -66,11 +69,11 @@ def __call__(self, Args: mol ('RDKitMol'): The stable species in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the stable species. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the stable species. Defaults to ``1``. + save_dir (_type_, optional): The directory path to save the results. Defaults to ``None``. Returns: - list: a list of true and false + list: a list of ``True`` and ``False`` indicating whether a conformer passes the check. """ time_start = time() mol = self.verify_guesses( @@ -91,18 +94,24 @@ def __call__(self, class XTBFrequencyVerifier(Verifier): """ The class for verifying the stable species by calculating and checking its frequencies using XTB. + + Args: + cutoff_frequency (float, optional): Cutoff frequency above which a frequency does not correspond to a TS + imaginary frequency to avoid small magnitude frequencies which correspond to internal bond rotations + Defaults to ``-100.`` cm-1. + track_stats (bool, optional): Whether to track stats. Defaults to ``False``. """ def __init__(self, - cutoff_frequency: int = -100, + cutoff_frequency: float = -100., track_stats: bool = False): """ Initiate the XTB frequency verifier. Args: - cutoff_frequency (int, optional): Cutoff frequency above which a frequency does not correspond to a TS - imaginary frequency to avoid small magnitude frequencies which correspond to internal bond rotations - (defaults to -100 cm-1) - track_stats (bool, optional): Whether to track stats. Defaults to False. + cutoff_frequency (float, optional): Cutoff frequency above which a frequency does not correspond to a TS + imaginary frequency to avoid small magnitude frequencies which correspond to internal bond rotations + Defaults to ``-100.`` cm-1. + track_stats (bool, optional): Whether to track stats. Defaults to ``False``. """ super(XTBFrequencyVerifier, self).__init__(track_stats) @@ -118,11 +127,11 @@ def verify_guesses(self, Args: mol ('RDKitMol'): The stable species in RDKitMol object with 3D geometries embedded. - multiplicity (int, optional): The spin multiplicity of the stable species. Defaults to 1. - save_dir (_type_, optional): The directory path to save the results. Defaults to None. + multiplicity (int, optional): The spin multiplicity of the stable species. Defaults to ``1``. + save_dir (_type_, optional): The directory path to save the results. Defaults to ``None``. Returns: - RDKitMol + RDKitMol: The molecule in RDKitMol object with verification results stored in ``KeepIDs``. """ if mol.GetNumAtoms() != 1: for i in range(mol.GetNumConformers()): From 2a0e19faa1ca69ae90bff6348e5a584560d4199b Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Mon, 11 Sep 2023 17:13:50 -0400 Subject: [PATCH 21/21] Add docs to workflow of conformer_generation Add rst files and clean up docstrings --- .../reference/conformer_generation/index.rst | 1 + .../workflow/generators.rst | 7 + .../conformer_generation/workflow/index.rst | 10 + .../conformer_generation/workflow/metrics.rst | 7 + .../conformer_generation/workflow/sampler.rst | 7 + .../workflow/ts_generators.rst | 7 + rdmc/conformer_generation/generators.py | 202 ++++++++++++---- rdmc/conformer_generation/metrics.py | 64 +++-- rdmc/conformer_generation/sampler.py | 219 +++++++++++------- rdmc/conformer_generation/ts_generators.py | 124 +++++++--- 10 files changed, 472 insertions(+), 176 deletions(-) create mode 100644 docs/source/reference/conformer_generation/workflow/generators.rst create mode 100644 docs/source/reference/conformer_generation/workflow/index.rst create mode 100644 docs/source/reference/conformer_generation/workflow/metrics.rst create mode 100644 docs/source/reference/conformer_generation/workflow/sampler.rst create mode 100644 docs/source/reference/conformer_generation/workflow/ts_generators.rst diff --git a/docs/source/reference/conformer_generation/index.rst b/docs/source/reference/conformer_generation/index.rst index 4faf5535..60321da6 100644 --- a/docs/source/reference/conformer_generation/index.rst +++ b/docs/source/reference/conformer_generation/index.rst @@ -12,4 +12,5 @@ rdmc.conformer_generation embedding_geometries/index geometry_optimization/index postprocessing/index + workflow/index utils diff --git a/docs/source/reference/conformer_generation/workflow/generators.rst b/docs/source/reference/conformer_generation/workflow/generators.rst new file mode 100644 index 00000000..bbe1c678 --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/generators.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.generators +===================================== + +.. automodule:: rdmc.conformer_generation.generators + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/workflow/index.rst b/docs/source/reference/conformer_generation/workflow/index.rst new file mode 100644 index 00000000..fc3a1b24 --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/index.rst @@ -0,0 +1,10 @@ +Workflow +===================== + +.. toctree:: + :maxdepth: 2 + + generators + ts_generators + metrics + sampler diff --git a/docs/source/reference/conformer_generation/workflow/metrics.rst b/docs/source/reference/conformer_generation/workflow/metrics.rst new file mode 100644 index 00000000..5ae1cb07 --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/metrics.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.metrics +===================================== + +.. automodule:: rdmc.conformer_generation.metrics + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/workflow/sampler.rst b/docs/source/reference/conformer_generation/workflow/sampler.rst new file mode 100644 index 00000000..2d783700 --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/sampler.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.sampler +======================================= + +.. automodule:: rdmc.conformer_generation.sampler + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/conformer_generation/workflow/ts_generators.rst b/docs/source/reference/conformer_generation/workflow/ts_generators.rst new file mode 100644 index 00000000..076f0fad --- /dev/null +++ b/docs/source/reference/conformer_generation/workflow/ts_generators.rst @@ -0,0 +1,7 @@ +rdmc.conformer_generation.ts_generators +======================================= + +.. automodule:: rdmc.conformer_generation.ts_generators + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/rdmc/conformer_generation/generators.py b/rdmc/conformer_generation/generators.py index 1c6bce59..65bd17e0 100644 --- a/rdmc/conformer_generation/generators.py +++ b/rdmc/conformer_generation/generators.py @@ -4,15 +4,17 @@ """ Modules for conformer generation workflows """ +import logging +from time import time +from typing import Optional + +import numpy as np from rdmc.mol import RDKitMol from .embedders import * from .pruners import * from .optimizers import * from .metrics import * -import numpy as np -import logging -from time import time logging.basicConfig( @@ -26,23 +28,61 @@ class StochasticConformerGenerator: """ A module for stochastic conformer generation. The workflow follows an embed -> optimize -> prune cycle with custom stopping criteria. Additional final modules can be added at the user's discretion. + + Args: + smiles (str): SMILES input for which to generate conformers. + embedder (ConfGenEmbedder, optional): Instance of a :obj:`ConfGenEmbedder `. + Available options are :obj:`ETKDGEmbedder `, + :obj:`GeoMolEmbedder `, and + :obj:`RandomEmbedder `. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. + estimator (Estimator, optional): Instance of an :obj:`Estimator `. Available option is + :obj:`ConfSolv `. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + metric (SCGMetric, optional): The available option is `SCGMetric `. + min_iters (int, optional): Minimum number of iterations for which to run the module. + max_iters (int, optional}: Maximum number of iterations for which to run the module. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. """ - def __init__(self, smiles, embedder=None, optimizer=None, estimator=None, pruner=None, - metric=None, min_iters=None, max_iters=None, final_modules=None, - config=None, track_stats=False): + def __init__(self, + smiles, + embedder: Optional['ConfGenEmbedder'] = None, + optimizer: Optional['ConfGenOptimizer'] = None, + estimator: Optional['Estimator'] = None, + pruner: Optional['ConfGenPruner'] = None, + metric: Optional['SCGMetric'] = None, + min_iters: Optional[int] = None, + max_iters: Optional[int] = None, + final_modules: Optional[list] = None, + config: Optional[dict] = None, + track_stats: bool = False): """ - Generate an RDKitMol Molecule instance from a RDKit ``Chem.rdchem.Mol`` or ``RWMol`` molecule. + Initialize the StochasticConformerGenerator module. Args: smiles (str): SMILES input for which to generate conformers. - embedder (class): Instance of an embedder from embedders.py. - optimizer (class): Instance of a optimizer from optimizers.py. - estimator (class): Any energy estimator instance. - pruner (class): Instance of a pruner from pruners.py. - metric (class): Instance of a metric from metrics.py. - min_iters (int): Minimum number of iterations for which to run the module (default=5). - max_iters (int}: Maximum number of iterations for which to run the module (default=100). - final_modules (List): List of instances of optimizer/pruner to run after initial cycles complete. + embedder (ConfGenEmbedder, optional): Instance of a :obj:`ConfGenEmbedder `. + Available options are :obj:`ETKDGEmbedder `, + :obj:`GeoMolEmbedder `, and + :obj:`RandomEmbedder `. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. + estimator (Estimator, optional): Instance of an :obj:`Estimator `. Available option is + :obj:`ConfSolv `. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + metric (SCGMetric, optional): The available option is `SCGMetric `. + min_iters (int, optional): Minimum number of iterations for which to run the module. + max_iters (int, optional}: Maximum number of iterations for which to run the module. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. """ self.logger = logging.getLogger(f"{self.__class__.__name__}") @@ -74,8 +114,19 @@ def __init__(self, smiles, embedder=None, optimizer=None, estimator=None, pruner if isinstance(self.pruner, TorsionPruner): self.pruner.initialize_torsions_list(smiles) - def __call__(self, n_conformers_per_iter, **kwargs): + def __call__(self, + n_conformers_per_iter: int, + **kwargs, + ) -> List[dict]: + """ + Run the workflow for stochastic conformer generation. + + Args: + n_conformers_per_iter (int): The number of conformers to generate per iteration. + Returns: + unique_mol_data (List[dict]): A list of dictionaries containing the unique conformers. + """ self.logger.info(f"Generating conformers for {self.smiles}") time_start = time() for _ in range(self.max_iters): @@ -131,9 +182,46 @@ def __call__(self, n_conformers_per_iter, **kwargs): return unique_mol_data - def set_config(self, config, embedder=None, optimizer=None, pruner=None, metric=None, final_modules=None, - min_iters=None, max_iters=None): + def set_config(self, + config: str, + embedder: Optional['ConfGenEmbedder'] = None, + optimizer: Optional['ConfGenOptimizer'] = None, + pruner: Optional['ConfGenPruner'] = None, + metric: Optional['SCGMetric'] = None, + min_iters: Optional[int] = None, + max_iters: Optional[int] = None, + final_modules: Optional[list] = None, + ): + """ + Set the configuration for the conformer generator with pre-defined options: ``"loose"`` and ``"normal"``. + Args: + embedder (ConfGenEmbedder, optional): Instance of a :obj:`ConfGenEmbedder `. + Available options are :obj:`ETKDGEmbedder `, + :obj:`GeoMolEmbedder `, and + :obj:`RandomEmbedder `. + Defaults to :obj:`ETKDGEmbedder `. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. Defaults to + :obj:`XTBOptimizer ` with ``"gff"`` method. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. By default, + ``"loose"`` utilizes :obj:`TorsionPruner ` with + ``mean_chk_threshold=20`` and ``max_chk_threshold=30``, and ``"normal"`` utilizes + :obj:`CRESTPruner `. + metric (SCGMetric, optional): The available option is `SCGMetric `. + By default, both modes use ``"entropy"`` metric, while ``"loose"`` with ``window=3`` and ``threshold=0.05``, + and ``"normal"`` with ``window=5`` and ``threshold=0.01``. + min_iters (int, optional): Minimum number of iterations for which to run the module. Defaults to ``3`` for ``"loose"`` and ``5`` for ``"normal"``. + max_iters (int, optional}: Maximum number of iterations for which to run the module. Defaults to ``20`` for ``"loose"`` and ``100`` for ``"normal"``. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. By default, no final modules are used for ``"loose"``. + For ``"normal"``, two :obj:`CRESTPruner ` with ``ewin=12`` and ``ewin=6``, are introduced + before and after a :obj:`XTBOptimizer ` with ``"gfn2"`` method + and ``"vtight"`` level. + """ if config == "loose": self.embedder = ETKDGEmbedder() if not embedder else embedder self.optimizer = XTBOptimizer(method="gff") if not optimizer else optimizer @@ -156,37 +244,63 @@ def set_config(self, config, embedder=None, optimizer=None, pruner=None, metric= self.min_iters = 5 if not min_iters else min_iters self.max_iters = 100 if not max_iters else max_iters -class ConformerGenerator(): +class ConformerGenerator: + """ + A module for conformer generation. The workflow follows an embed -> optimize -> prune cycle with + custom stopping criteria. Additional final modules can be added at the user's discretion. + + Args: + smiles (str): SMILES input for which to generate conformers. + multiplicity (int, optional): The spin multiplicity of the species. Defaults to ``None``, + which will be interpreted from molecule generated by the `smiles`. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (Verifier, optional): Instance of a :obj:`Verifier `. + Available option is :obj:`XTBFrequencyVerifier `. + sampler (TorsionalSampler, optional): Instance of a :obj:`TorsionalSampler `. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. + save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. + """ def __init__(self, smiles: str, multiplicity: Optional[int] = None, - optimizer: Optional['Optimizer'] = None, + optimizer: Optional['ConfGenOptimizer'] = None, pruner: Optional['ConfGenPruner'] = None, verifiers: Optional[Union['Verifier',List['Verifier']]] = None, sampler: Optional['TorisonalSampler'] = None, - final_modules: Optional[Union['Optimizer','Verifier']] = None, + final_modules: Optional[Union['ConfGenOptimizer','Verifier']] = None, save_dir: Optional[str] = None, ) -> 'ConformerGenerator': """ - Initiate the conformer generator object. The best practice is set all information here + Initiate the conformer generator object. The best practice is set all information here. + Args: - smiles (str): The SMILES of the species. - multiplicity (int, optional): The spin multiplicity of the species. The spin multiplicity will be interpreted from the smiles if this - is not given by the user. - optimizer (GaussianOptimizer, optional): The optimizer used to optimize geometries. - pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. Available options are - `CRESTPruner` and `TorsionPruner`. - verifiers (XTBFrequencyVerifier, optional): The verifier used to verify the obtained conformer. - sampler (TorisonalSampler, optional): The sampler used to do automated conformer search for the obtained conformer. - final_modules (Optimizer, Verifier, optional): The final modules can include optimizer in different LoT than previous - one and verifier used to verify the obtained conformer. + smiles (str): SMILES input for which to generate conformers. + multiplicity (int, optional): The spin multiplicity of the species. Defaults to ``None``, + which will be interpreted from molecule generated by the `smiles`. + optimizer (ConfGenOptimizer, optional): Instance of a :obj:`ConfGenOptimizer `. + Available options are :obj:`XTBOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`MMFFOptimizer `. + pruner (ConfGenPruner, optional): Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (Verifier, optional): Instance of a :obj:`Verifier `. + Available option is :obj:`XTBFrequencyVerifier `. + sampler (TorsionalSampler, optional): Instance of a :obj:`TorsionalSampler `. + final_modules (list): List of instances of optimizer/pruner to run after initial cycles complete. save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. """ self.logger = logging.getLogger(f"{self.__class__.__name__}") self.smiles = smiles if multiplicity: self.multiplicity = multiplicity - else: + else: mol = RDKitMol.FromSmiles(smiles) mul = mol.GetSpinMultiplicity() self.multiplicity = mul @@ -203,12 +317,14 @@ def __init__(self, def embed_stable_species(self, smiles: str, n_conformers: int = 20, - ) -> 'rdmc.RDKitMol': + ) -> 'RDKitMol': """ Embed the well conformer according to the SMILES provided. + Args: smiles (str): The well conformer SMILES. - n_conformers (int, optional): The maximum number of conformers to be generated. Defaults to 20. + n_conformers (int, optional): The maximum number of conformers to be generated. Defaults to ``20``. + Returns: An RDKitMol of the well conformer with 3D geometry embedded. """ @@ -243,18 +359,18 @@ def embed_stable_species(self, return mol def set_filter(self, - mol: 'RDKitMol', + mol: RDKitMol, n_conformers: int, ) -> list: """ - Assign the indices of reactions to track wheter the conformers are passed to the following steps. + Assign the indices of conformers to track whether the conformers are passed to the following steps. Args: mol ('RDKitMol'): The stable species in RDKitMol object with 3D geometries embedded. n_conformers (int): The maximum number of conformers to be passed to the following steps. Returns: - An RDKitMol with KeepIDs having `True` values to be passed to the following steps. + RDKitMol: with ``KeepIDs`` as a list of ``True`` and ``False`` indicating whether a conformer passes the check. """ energy_dict = mol.energy KeepIDs = mol.KeepIDs @@ -275,10 +391,10 @@ def __call__(self, Run the workflow of well conformer generation. Args: - n_conformers (int): The maximum number of conformers to be generated. Defaults to 20. - n_verifies (int): The maximum number of conformers to be passed to the verifiers. Defaults to 20. - n_sampling (int): The maximum number of conformers to be passed to the torsional sampling. Defaults to 1. - n_refines (int): The maximum number of conformers to be passed to the final modeuls. Defaults to 1. + n_conformers (int): The maximum number of conformers to be generated. Defaults to ``20``. + n_verifies (int): The maximum number of conformers to be passed to the verifiers. Defaults to ``20``. + n_sampling (int): The maximum number of conformers to be passed to the torsional sampling. Defaults to ``1``. + n_refines (int): The maximum number of conformers to be passed to the final modules. Defaults to ``1``. """ if self.save_dir: @@ -342,4 +458,4 @@ def __call__(self, with open(os.path.join(self.save_dir, "workflow_check_ids.pkl"), "wb") as f: pickle.dump(opt_mol.KeepIDs, f) - return opt_mol \ No newline at end of file + return opt_mol diff --git a/rdmc/conformer_generation/metrics.py b/rdmc/conformer_generation/metrics.py index 1e66d526..da51070f 100644 --- a/rdmc/conformer_generation/metrics.py +++ b/rdmc/conformer_generation/metrics.py @@ -6,7 +6,7 @@ """ import numpy as np -from typing import Optional +from typing import List, Optional R = 0.0019872 # kcal/(K*mol) @@ -14,23 +14,34 @@ class SCGMetric: """ A class to calculate and track the given metric ("entropy", "partition function", or "total conformers") for a molecule over time. + + Args: + metric (str, optional): Metric to be calculated. Options are ``"entropy"``, ``"partition function"``, or ``"total conformers"``. + Defaults to ``"entropy"``. + window (int, optional): Window size to compute the change in metric (doesn't work when the metric is "total conformers"). + Defaults to ``5``. + threshold (float, optional): Threshold for the change in metric to decide when to stop generating conformers. + Defaults to ``0.01``. + T (float, optional): Temperature for entropy or partition function calculations. Defaults to ``298`` K. """ - def __init__( - self, - metric: Optional[str] = "entropy", - window: Optional[int] = 5, - threshold: Optional[float] = 0.01, - T: Optional[float] = 298, - ): + def __init__(self, + metric: Optional[str] = "entropy", + window: Optional[int] = 5, + threshold: Optional[float] = 0.01, + T: Optional[float] = 298, + ): """ Generate an SCGMetric instance. Args: - metric (str): Metric to be calculated. - window (int): Window size to compute the change in metric (doesn't work when the metric is "total conformers"). - threshold (float): Threshold for the change in metric to decide when to stop generating conformers. - T (float): Temperature for entropy or partition function calculations. + metric (str, optional): Metric to be calculated. Options are ``"entropy"``, ``"partition function"``, or ``"total conformers"``. + Defaults to ``"entropy"``. + window (int, optional): Window size to compute the change in metric (doesn't work when the metric is "total conformers"). + Defaults to ``5``. + threshold (float, optional): Threshold for the change in metric to decide when to stop generating conformers. + Defaults to ``0.01``. + T (float, optional): Temperature for entropy or partition function calculations. Defaults to ``298`` K. """ self.metric = metric self.window = window @@ -38,8 +49,14 @@ def __init__( self.T = T self.metric_history = [] - def calculate_metric(self, mol_data): + def calculate_metric(self, + mol_data: List[dict]): + """ + Calculate the metric for a given molecule. The calculated value will be appended to the ``metric_history`` list. + Args: + mol_data (List[dict]): A list of dictionaries with molecule conformers. + """ if self.metric == "entropy": metric_val = self.calculate_entropy(mol_data) @@ -55,7 +72,12 @@ def calculate_metric(self, mol_data): self.metric_history.append(metric_val) def check_metric(self): + """ + Check if the change in metric is below the threshold. + Returns: + bool: ``True`` if the change in metric is below the threshold, ``False`` otherwise. + """ if self.metric == "total conformers": return False else: @@ -66,8 +88,14 @@ def check_metric(self): ) return True if change <= self.threshold else False - def calculate_entropy(self, mol_data): + def calculate_entropy(self, + mol_data: List[dict]): + """ + Calculate the entropy of a molecule. + Args: + mol_data (List[dict]): A list of dictionaries with molecule conformers. + """ energies = np.array([c["energy"] for c in mol_data]) energies = energies - energies.min() _prob = np.exp(-energies / (R * self.T)) @@ -75,8 +103,14 @@ def calculate_entropy(self, mol_data): entropy = -R * np.sum(prob * np.log(prob)) return entropy - def calculate_partition_function(self, mol_data): + def calculate_partition_function(self, + mol_data: List[dict]): + """ + Calculate the partition function of a molecule. + Args: + mol_data (List[dict]): A list of dictionaries with molecule conformers. + """ energies = np.array([c["energy"] for c in mol_data]) energies = energies - energies.min() prob = np.exp(-energies / (R * self.T)) diff --git a/rdmc/conformer_generation/sampler.py b/rdmc/conformer_generation/sampler.py index 68b013ef..2aba1cc6 100644 --- a/rdmc/conformer_generation/sampler.py +++ b/rdmc/conformer_generation/sampler.py @@ -16,7 +16,6 @@ import numpy as np from scipy import constants from rdkit import Chem -import seaborn as sns import matplotlib.pyplot as plt from matplotlib.patches import Rectangle @@ -25,49 +24,83 @@ from rdmc.mathlib.greedymin import search_minimum from rdmc.ts import get_formed_and_broken_bonds -from xtb.libxtb import VERBOSITY_FULL, VERBOSITY_MINIMAL, VERBOSITY_MUTED -from xtb.utils import get_method, _methods -from xtb.interface import Calculator +try: + from xtb.libxtb import VERBOSITY_FULL, VERBOSITY_MINIMAL, VERBOSITY_MUTED + from xtb.utils import get_method, _methods + from xtb.interface import Calculator +except ImportError: + print("No xtb-python installation detected. Skipping import...") try: import scine_sparrow import scine_utilities as su except: - print("No scine_sparrow installation deteced. Skipping import...") + print("No scine_sparrow installation detected. Skipping import...") class TorsionalSampler: """ A class to find possible conformers by sampling the PES for each torsional pair. - You have to have the Spharrow and xtb-python packages installed to run this workflow. + You have to have the `Sparrow `_ and `xtb-python `_ packages installed to run this workflow. + + Args: + method (str, optional): The method to be used for automated conformer search. Only the methods available in Spharrow and xtb-python can be used. + Defaults to ``"GFN2-xTB"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + n_point_each_torsion (float, optional): Number of points to be sampled along each rotational mode. Defaults to ``45.``. + n_dimension (int, optional): Number of dimensions. Defaults to ``2``. If ``-1`` is assigned, the number of dimension would be the number of rotatable bonds. + optimizer (ConfGenOptimizer or TSOptimizer, optional): The optimizer used to optimize TS or stable specials geometries. Available options for + `TSOptimizer ` + are :obj:`SellaOptimizer `, + :obj:`OrcaOptimizer `, + and :obj:`GaussianOptimizer `. + pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (TSVerifier, Verifier, list of TSVerifiers or list of Verifiers, optional): The verifier or a list of verifiers used to verify the obtained conformer. Available + options are + :obj:`GaussianIRCVerifier `, + :obj:`OrcaIRCVerifier `, and + :obj:`XTBFrequencyVerifier `. """ - def __init__( - self, - method: str = "GFN2-xTB", - nprocs: int = 1, - memory: int = 1, - n_point_each_torsion: int = 45, - n_dimension: int = 2, - optimizer: Optional[Union["XTBOptimizer", "TSOptimizer", "Optimizer"]] = None, - pruner: Optional["ConfGenPruner"] = None, - verifiers: Optional[Union["TSVerifier", "Verifier", List["TSVerifier"], List["Verifier"]]] = None, - ): + def __init__(self, + method: str = "GFN2-xTB", + nprocs: int = 1, + memory: int = 1, + n_point_each_torsion: int = 45, + n_dimension: int = 2, + optimizer: Optional[Union["ConfGenOptimizer","TSOptimizer"]] = None, + pruner: Optional["ConfGenPruner"] = None, + verifiers: Optional[Union["TSVerifier", + "Verifier", + List["TSVerifier"], + List["Verifier"]]] = None, + ): """ Initiate the TorsionalSampler class object. + Args: method (str, optional): The method to be used for automated conformer search. Only the methods available in Spharrow and xtb-python can be used. - Defaults to GFN2-xTB. - nprocs (int, optional): The number of processors to use. Defaults to 1. - memory (int, optional): Memory in GB used by Gaussian. Defaults to 1. - n_point_each_torsion (int): Number of points to be sampled along each rotational mode. Defaults to 45. - n_dimension (int): Number of dimensions. Defaults to 2. If `-1` is assigned, the n_dimension would be the number of rotatable bonds. - optimizer (XTBOptimizer, TSOptimizer or Optimizer, optional): The optimizer used to optimize TS or stable specials geometries. Available options for `TSOptimizer` - are `SellaOptimizer`, `OrcaOptimizer`, and `GaussianOptimizer`. + Defaults to ``"GFN2-xTB"``. + nprocs (int, optional): The number of processors to use. Defaults to ``1``. + memory (int, optional): Memory in GB used by Gaussian. Defaults to ``1``. + n_point_each_torsion (float, optional): Number of points to be sampled along each rotational mode. Defaults to ``45.``. + n_dimension (int, optional): Number of dimensions. Defaults to ``2``. If ``-1`` is assigned, the number of dimension would be the number of rotatable bonds. + optimizer (ConfGenOptimizer or TSOptimizer, optional): The optimizer used to optimize TS or stable specials geometries. Available options for + `TSOptimizer ` + are :obj:`SellaOptimizer `, + :obj:`OrcaOptimizer `, + and :obj:`GaussianOptimizer `. pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. Available options are - `CRESTPruner` and `TorsionPruner`. + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. verifiers (TSVerifier, Verifier, list of TSVerifiers or list of Verifiers, optional): The verifier or a list of verifiers used to verify the obtained conformer. Available - options are `GaussianIRCVerifier`, `OrcaIRCVerifier`, and `XTBFrequencyVerifier`. + options are + :obj:`GaussianIRCVerifier `, + :obj:`OrcaIRCVerifier `, and + :obj:`XTBFrequencyVerifier `. """ self.logger = logging.getLogger(f"{self.__class__.__name__}") self.method = method @@ -79,28 +112,28 @@ def __init__( self.pruner = pruner self.verifiers = [] if not verifiers else verifiers - def get_conformers_by_change_torsions( - self, - mol: RDKitMol, - id: int = 0, - torsions: List = None, - exclude_methyl: bool = True, - on_the_fly_check: bool = True, - ) -> List[RDKitMol]: + def get_conformers_by_change_torsions(self, + mol: RDKitMol, + id: int = 0, + torsions: Optional[list] = None, + exclude_methyl: bool = True, + on_the_fly_check: bool = True, + ) -> List[RDKitMol]: """ Generate conformers by rotating the angles of the torsions. A on-the-fly check can be applied, which identifies the conformers with colliding atoms. Args: mol (RDKitMol): A RDKitMol molecule object. - id (int): The ID of the conformer to be obtained. Defaults to 0. - torsions (list): A list of four-atom-index lists indicating the torsional modes. - exclude_methyl (bool): Whether exclude the torsions with methyl groups. Defaults to False. - If `torsions` is provided, this function won't work. - on_the_fly_filter (bool): Whether to check colliding atoms on the fly. Defaults to True. + id (int): The ID of the conformer to be obtained. Defaults to ``0``. + torsions (list): A list of four-atom-index lists indicating the torsional modes. Defaults to ``None``, + which means all the rotatable bonds will be used. + exclude_methyl (bool): Whether exclude the torsions with methyl groups. Defaults to ``False``. + Only valid if ``torsions`` is not provided. + on_the_fly_filter (bool): Whether to check colliding atoms on the fly. Defaults to ``True``. Returns: - A list of RDKitMol of sampled 3D geometries for each torsional mode. + lis: A list of RDKitMol of sampled 3D geometries for each torsional mode. """ conf = mol.Copy().GetConformer(id=id) origin_coords = mol.GetPositions(id=id) @@ -167,30 +200,31 @@ def get_conformers_by_change_torsions( return conformers_by_change_torsions - def __call__( - self, - mol: RDKitMol, - id: int, - rxn_smiles: Optional[str] = None, - torsions: Optional[List] = None, - no_sample_dangling_bonds: bool = True, - no_greedy: bool = False, - save_dir: Optional[str] = None, - save_plot: bool = True, - ): + def __call__(self, + mol: RDKitMol, + id: int, + rxn_smiles: Optional[str] = None, + torsions: Optional[List] = None, + no_sample_dangling_bonds: bool = True, + no_greedy: bool = False, + save_dir: Optional[str] = None, + save_plot: bool = True, + ): """ Run the workflow of conformer generation. Args: mol (RDKitMol): An RDKitMol object. id (int): The ID of the conformer to be obtained. - rxn_smiles (str, optional): The SMILES of the reaction. The SMILES should be formatted similar to `"reactant1.reactant2>>product1.product2."`. + rxn_smiles (str, optional): The SMILES of the reaction. The SMILES should be formatted similar to + `"reactant1.reactant2>>product1.product2."`. Defaults to ``None``, which means + ``torsions`` will be provided and used to generate conformers. torsions (list, optional): A list of four-atom-index lists indicating the torsional modes. - no_sample_dangling_bonds (bool): Whether to sample dangling bonds. Defaults to False. - no_greedy (bool): Whether to use greedy algorithm to find local minima. If `True`, all the sampled conformers - would be passed to the optimization and verification steps. Defaults to False. + no_sample_dangling_bonds (bool): Whether to sample dangling bonds. Defaults to ``False``. + no_greedy (bool): Whether to use greedy algorithm to find local minima. If ``True``, all the sampled conformers + would be passed to the optimization and verification steps. Defaults to ``False``. save_dir (str or Pathlike object, optional): The path to save the outputs generated during the generation. - save_plot (bool): Whether to save the heat plot for the PES of each torsinal mode. Defaults to True. + save_plot (bool): Whether to save the heat plot for the PES of each torsional mode. Defaults to ``True``. """ # Get bonds which will not be rotated during conformer searching sampler_mol = mol.Copy() @@ -395,27 +429,30 @@ def __call__( return mol -def get_separable_angle_list( - samplings: Union[List, Tuple], from_angles: Optional[Union[List, Tuple]] = None -) -> List[List]: +def get_separable_angle_list(samplings: Union[List, Tuple], + from_angles: Optional[Union[List, Tuple]] = None + ) -> List[List]: """ Get a angle list for each input dimension. For each dimension - The input can be a int, indicating the angles will be evenly sampled; - Or a list, indicate the angles to be sampled; - Examples: - [[120, 240,], 4, 0] => [[120, 240], - [0, 90, 180, 270], - [0]] - List of lists are returned for the sake of further calculation + The input can be a ``int`` indicating the angles will be evenly sampled; + or a ``list`` indicating the angles to be sampled; Args: samplings (Union[List, Tuple]): An array of sampling information. - For each element, it can be either list or int. + For each element, it can be either list or int. from_angles (Union[List, Tuple]): An array of initial angles. - If not set, angles will begin at zeros. + If not set, all angles will begin at zeros. Returns: list: A list of sampled angles sets. + + Examples: + + .. code-block:: python + + get_separable_angle_list([[120, 240,], 4, 0]) + >>> [[120, 240], [0, 90, 180, 270], [0]] + """ from_angles = from_angles or len(samplings) * [0.0] angle_list = [] @@ -444,15 +481,18 @@ def get_separable_angle_list( return angle_list -def get_energy(mol: RDKitMol, confId: int = 0, method: str = "GFN2-xTB") -> float: +def get_energy(mol: RDKitMol, + confId: int = 0, + method: str = "GFN2-xTB", + ) -> float: """ - Calculate the energy of the `RDKitMol` with given confId. The unit is in kcal/mol. - Only support methods already suported either in Spharrow or xtb-python. + Calculate the energy of the ``RDKitMol`` with given ``confId``. The unit is in kcal/mol. + Only support methods already supported either in sparrow or xtb-python. Args: mol (RDKitMol): A RDKitMol molecule object. - confId (int): The ID of the conformer for calculating energy. Defaults to 0. - method (str): Which semiempirical method to be used in running energy calcualtion. Defaults to "GFN2-xTB". + confId (int): The ID of the conformer for calculating energy. Defaults to ``0``. + method (str): Which semi-empirical method to be used in running energy calculation. Defaults to ``"GFN2-xTB"``. Returns: The energy of the conformer. @@ -495,7 +535,7 @@ def get_energy(mol: RDKitMol, confId: int = 0, method: str = "GFN2-xTB") -> floa return energy -def preprocess_energies(energies: np.ndarray): +def preprocess_energies(energies: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Rescale the energy based on the lowest energy. @@ -503,7 +543,7 @@ def preprocess_energies(energies: np.ndarray): energies (np.ndarray): A np.ndarray containing the energies for each sampled point. Returns: - The rescaled energies and the mask pointing out positions having values + tuple: The rescaled energies and the mask pointing out positions having values """ max_energy = np.nanmax(energies) min_energy = np.nanmin(energies) @@ -518,15 +558,26 @@ def preprocess_energies(energies: np.ndarray): return rescaled_energies, mask -def plot_heat_map( - energies: np.ndarray, - minimum_points: List[Tuple], - save_path: str, - mask: np.ndarray = None, - detailed_view: bool = False, - title: str = None, -): - """Plot and save the heat map of a given PES.""" +def plot_heat_map(energies: np.ndarray, + minimum_points: List[Tuple], + save_path: str, + mask: Optional[np.ndarray] = None, + detailed_view: bool = False, + title: Optional[str] = None, + ): + """ + Plot and save the heat map of a given PES. + + Args: + energies (np.ndarray): A ``np.ndarray`` containing the energies for each sampled point. + minimum_points (List[Tuple]): A list of tuples containing the indices of the minimum points. + save_path (str): The path to save the plot. + mask (np.ndarray, optional): A ``np.ndarray`` containing the mask for the energies. + detailed_view (bool): Whether to plot the detailed view of the PES. Defaults to ``False``. + title (str, optional): The title of the plot. + """ + import seaborn as sns + if detailed_view: fig_size = (28, 20) annot = True # detailed view diff --git a/rdmc/conformer_generation/ts_generators.py b/rdmc/conformer_generation/ts_generators.py index d6dda4b7..394a83e2 100644 --- a/rdmc/conformer_generation/ts_generators.py +++ b/rdmc/conformer_generation/ts_generators.py @@ -2,15 +2,16 @@ #-*- coding: utf-8 -*- """ -Modules for ts conformer generation workflows +Modules for TS conformer generation workflows. """ -import os -import numpy as np import logging -import random +import os import pickle from typing import List, Optional, Union +import random + +import numpy as np from rdmc.conformer_generation.utils import * from rdmc.conformer_generation.generators import StochasticConformerGenerator @@ -21,12 +22,47 @@ class TSConformerGenerator: """ The class used to define a workflow for generating a set of TS conformers. + + Args: + rxn_smiles (str): The SMILES of the reaction. The SMILES should be formatted similar to ``"reactant1.reactant2>>product1.product2."``. + multiplicity (int, optional): The spin multiplicity of the reaction. The spin multiplicity will be interpreted from the reaction smiles if this + is not given by the user. + use_smaller_multiplicity (bool, optional): Whether to use the smaller multiplicity when the interpreted multiplicity from the reaction smiles is + inconsistent between reactants and products. Defaults to ``True``. + embedder (TSInitialGuesser, optional): Instance of a :obj:`TSInitialGuesser `. Available options are + :obj:`TSEGNNGuesser `, + :obj:`TSGCNGuesser `, + :obj:`AutoNEBGuesser `, + :obj:`RMSDPPGuesser `, and + :obj:`DEGSMGuesser `. + optimizer (TSOptimizer, optional): Instance of a :obj:`TSOptimizer `. Available options are + :obj:`SellaOptimizer `, + :obj:`OrcaOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`QChemOptimizer `. + pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. + Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (TSVerifier or list of TSVerifiers, optional): The verifier or a list of verifiers used to verify the obtained TS conformer. + Instance of a :obj:`TSVerifier `. + Available options are + :obj:`XTBFrequencyVerifier `, + :obj:`GaussianIRCVerifier `, + :obj:`OrcaIRCVerifier `, + :obj:`QChemIRCVerifier `, and + :obj:`TSScreener `. + sampler (TorisonalSampler, optional): The sampler used to do automated conformer search for the obtained TS conformer. You can use + :obj:`TorsionalSampler ` to define your own sampler. + final_modules (TSOptimizer, TSVerifier or list of TSVerifiers, optional): The final modules can include optimizer in different LoT than previous + one and verifier(s) used to verify the obtained TS conformer. + save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. Defaults to ``None``. """ def __init__(self, rxn_smiles: str, multiplicity: Optional[int] = None, - use_smaller_multiplicity: Optional[bool] = True, + use_smaller_multiplicity: bool = True, embedder: Optional['TSInitialGuesser'] = None, optimizer: Optional['TSOptimizer'] = None, pruner: Optional['ConfGenPruner'] = None, @@ -36,32 +72,48 @@ def __init__(self, save_dir: Optional[str] = None, ) -> 'TSConformerGenerator': """ - Initiate the TS conformer generator object. The best practice is set all information here + The class used to define a workflow for generating a set of TS conformers. Args: - rxn_smiles (str): The SMILES of the reaction. The SMILES should be formatted similar to `"reactant1.reactant2>>product1.product2."`. + rxn_smiles (str): The SMILES of the reaction. The SMILES should be formatted similar to ``"reactant1.reactant2>>product1.product2."``. multiplicity (int, optional): The spin multiplicity of the reaction. The spin multiplicity will be interpreted from the reaction smiles if this - is not given by the user. + is not given by the user. use_smaller_multiplicity (bool, optional): Whether to use the smaller multiplicity when the interpreted multiplicity from the reaction smiles is - inconsistent. - embedder (TSInitialGuesser, optional): The embedder used to generate TS initial guessers. Available options are `TSEGNNGuesser`, `TSGCNGuesser`. - `RMSDPPGuesser`, and `AutoNEBGuesser`. - optimizer (TSOptimizer, optional): The optimizer used to optimize TS geometries. Available options are `SellaOptimizer`, `OrcaOptimizer`, and - `GaussianOptimizer`. - pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. Available options are - `CRESTPruner` and `TorsionPruner`. - verifiers (TSVerifier or list of TSVerifiers, optional): The verifier or a list of verifiers used to verify the obtained TS conformer. Available - options are `GaussianIRCVerifier`, `OrcaIRCVerifier`, and `XTBFrequencyVerifier`. - sampler (TorisonalSampler, optional): The sampler used to do automated conformer search for the obtained TS conformer. + inconsistent between reactants and products. Defaults to ``True``. + embedder (TSInitialGuesser, optional): Instance of a :obj:`TSInitialGuesser `. Available options are + :obj:`TSEGNNGuesser `, + :obj:`TSGCNGuesser `, + :obj:`AutoNEBGuesser `, + :obj:`RMSDPPGuesser `, and + :obj:`DEGSMGuesser `. + optimizer (TSOptimizer, optional): Instance of a :obj:`TSOptimizer `. Available options are + :obj:`SellaOptimizer `, + :obj:`OrcaOptimizer `, + :obj:`GaussianOptimizer `, and + :obj:`QChemOptimizer `. + pruner (ConfGenPruner, optional): The pruner used to prune conformers based on geometric similarity after optimization. + Instance of a :obj:`ConfGenPruner `. Available options are + :obj:`CRESTPruner ` and + :obj:`TorsionPruner `. + verifiers (TSVerifier or list of TSVerifiers, optional): The verifier or a list of verifiers used to verify the obtained TS conformer. + Instance of a :obj:`TSVerifier `. + Available options are + :obj:`XTBFrequencyVerifier `, + :obj:`GaussianIRCVerifier `, + :obj:`OrcaIRCVerifier `, + :obj:`QChemIRCVerifier `, and + :obj:`TSScreener `. + sampler (TorisonalSampler, optional): The sampler used to do automated conformer search for the obtained TS conformer. You can use + :obj:`TorsionalSampler ` to define your own sampler. final_modules (TSOptimizer, TSVerifier or list of TSVerifiers, optional): The final modules can include optimizer in different LoT than previous one and verifier(s) used to verify the obtained TS conformer. - save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. + save_dir (str or Pathlike object, optional): The path to save the intermediate files and outputs generated during the generation. Defaults to ``None``. """ self.logger = logging.getLogger(f"{self.__class__.__name__}") self.rxn_smiles = rxn_smiles if multiplicity: - self.multiplicity = multiplicity - else: + self.multiplicity = multiplicity + else: r_smi, p_smi = rxn_smiles.split(">>") r_mol = RDKitMol.FromSmiles(r_smi) p_mol = RDKitMol.FromSmiles(p_smi) @@ -99,10 +151,10 @@ def embed_stable_species(self, Args: smiles (str): The reactant or product complex in SMILES. if multiple molecules involve, - use `.` to separate them. + use ``"."`` to separate them. Returns: - An RDKitMol of the reactant or product complex with 3D geometry embedded. + RDKitMol: An RDKitMol of the reactant or product complex with 3D geometry embedded. """ # Split the complex smiles into a list of molecule smiles smiles_list = smiles.split(".") @@ -166,11 +218,11 @@ def generate_seed_mols(self, Args: rxn_smiles (str): The reaction smiles of the reaction. - n_conformers (int, optional): The maximum number of conformers to be generated. Defaults to 20. - shuffle (Bool, optional): Whether or not to shuffle the embedded mols. + n_conformers (int, optional): The maximum number of conformers to be generated. Defaults to ``20``. + shuffle (bool, optional): Whether or not to shuffle the embedded mols. Defaults to ``False``. Returns: - list + list: A list of reactant/product pairs in ``RDKitMol``. """ # Convert SMILES to reactant and product complexes r_smi, p_smi = rxn_smiles.split(">>") @@ -234,16 +286,16 @@ def generate_seed_mols(self, def set_filter(self, ts_mol: 'RDKitMol', n_conformers: int, - ) -> list: + ) -> RDKitMol: """ - Assign the indices of reactions to track wheter the conformers are passed to the following steps. + Assign the indices of reactions to track whether the conformers are passed to the following steps. Args: ts_mol ('RDKitMol'): The TS in RDKitMol object with 3D geometries embedded. n_conformers (int): The maximum number of conformers to be passed to the following steps. Returns: - An RDKitMol with KeepIDs having `True` values to be passed to the following steps. + RDKitMol: with ``KeepIDs`` as a list of ``True`` and ``False`` indicating whether a conformer passes the check. """ energy_dict = ts_mol.energy KeepIDs = ts_mol.KeepIDs @@ -259,15 +311,19 @@ def __call__(self, n_conformers: int = 20, n_verifies: int = 20, n_sampling: int = 1, - n_refines: int = 1): + n_refines: int = 1, + ) -> 'RDKitMol': """ Run the workflow of TS conformer generation. Args: - n_conformers (int): The maximum number of conformers to be generated. Defaults to 20. - n_verifies (int): The maximum number of conformers to be passed to the verifiers. Defaults to 20. - n_sampling (int): The maximum number of conformers to be passed to the torsional sampling. Defaults to 1. - n_refines (int): The maximum number of conformers to be passed to the final modeuls. Defaults to 1. + n_conformers (int): The maximum number of conformers to be generated. Defaults to ``20``. + n_verifies (int): The maximum number of conformers to be passed to the verifiers. Defaults to ``20``. + n_sampling (int): The maximum number of conformers to be passed to the torsional sampling. Defaults to ``1``. + n_refines (int): The maximum number of conformers to be passed to the final modules. Defaults to ``1``. + + Returns: + RDKitMol: The TS in RDKitMol object with 3D geometries embedded. """ if self.save_dir: