From 925fdba735c9715e5d3d9f93ffddaf2db2b4d23f Mon Sep 17 00:00:00 2001
From: Nathan Yee <nyee@mit.edu>
Date: Wed, 31 Aug 2016 14:43:45 -0400
Subject: [PATCH 1/3] Create script createKineticsLibrary to make a kinetics
 library from hardcoded inputs

---
 scripts/createKineticsLibrary.py | 169 +++++++++++++++++++++++++++++++
 1 file changed, 169 insertions(+)
 create mode 100644 scripts/createKineticsLibrary.py

diff --git a/scripts/createKineticsLibrary.py b/scripts/createKineticsLibrary.py
new file mode 100644
index 0000000000..fd661f6f30
--- /dev/null
+++ b/scripts/createKineticsLibrary.py
@@ -0,0 +1,169 @@
+from rmgpy.rmg.model import Species
+from rmgpy import settings
+from rmgpy.data.kinetics.library import KineticsLibrary
+
+from rmgpy.data.reference import *
+import os.path
+from rmgpy.kinetics.arrhenius import Arrhenius
+from rmgpy.quantity import ScalarQuantity
+from rmgpy.data.base import Entry
+from rmgpy.reaction import Reaction
+import re
+"""
+The purpose of this script is to create a reaction library from the least amount of input.
+
+Might eventually make it so it reads in a csv instead of hard-coding inputs below.
+
+Currently the numbers below are all made up, so they should not be used by any means.
+
+For the inputs, every variable that is a list has elements where the indexes correspond with entry in another list
+For example, one complete data entry for Arrhenius parameters would be A[i], n[i], and Ea[i]
+"""
+
+#initalize library variables
+libraryName = 'test'
+library = KineticsLibrary()
+library.name = libraryName
+library.label =libraryName
+library.shortDesc = "Library with made up rates"
+library.longDesc = """
+This library is a test created to showcase the createKineticsLibrary.py script
+"""
+
+#smiles for reactants and products
+#can use empty string of None for unimolecular reactions
+react1smiles = ["CC=CC", "C=C", 'CCCCC[CH2]']
+react2smiles = ['[CH3]', '[CH3]', '']
+product1smiles = ["[CH2]C=CC", "[CH]=C", "C[CH]CCCC"]
+product2smiles = ['C', 'C', None]
+
+#kinetics parameters
+AunitsBimolecular = 'cm^3/(mol*s)'
+AunitsUnimolecular = 's^-1'
+EaUnits = 'kcal/mol'
+A = [18.06, 26.3, 18.4]
+n = [3.27, 3.24, 3.27]
+Ea = [6.85, 7.03, 7.15]
+
+#write comments
+shortDesc0 = u"""CBS-QB3 calculation with 1-d rotor treatment at B3LYP/631G(d)"""
+longDesc0 = u"""
+Quantum chemistry calculations CBS-QB3 calculation with 1-d rotor treatment at
+B3LYP/631G(d)" using Gaussian 03 and Gaussian 09. High-pressure-limit rate
+coefficient computed TST with Eckart Tunnelling"
+"""
+reference0 = Article(
+    authors=["K. Wang", "S. Villano", "A. Dean"],
+    title=u'Reactions of allylic radicals that impact molecular weight growth kinetics',
+    journal="Phys. Chem. Chem. Phys.",
+    volume="17",
+    pages="""6255-6273""",
+    year="2015",
+)
+# flexible lists so that different entries can have different comments
+# if you want no shortDesc or longDesc, you should use empty strings for Desc
+shortDescList= [shortDesc0, '', '']
+longDescList = [longDesc0, '', '']
+# if you want no reference you should use None
+referenceList = [reference0, None, None]
+
+#done with inputs
+#################################################################################################################
+#check list lengths (useful check for some assurance you've entered things correctly)
+length = len(react1smiles)
+assert len(react2smiles) == length, "react2smiles has a different length than other lists"
+assert len(product1smiles) == length, "product1smiles has a different length than other lists"
+assert len(product2smiles) == length, "product2smiles has a different length than other lists"
+assert len(A) == length, "A has a different length than other lists"
+assert len(n) == length, "n has a different length than other lists"
+assert len(Ea) == length, "Ea has a different length than other lists"
+assert len(shortDescList) == length, "shortDescList has a different length than other lists"
+assert len(longDescList) == length, "longDescList has a different length than other lists"
+assert len(referenceList) == length, "referenceList has a different length than other lists"
+
+#create entries in the library
+speciesDict={}
+for index, r1smiles in enumerate(react1smiles):
+    bimolecular = False
+    #example item
+    r2smiles = react2smiles[index]
+    p1smiles = product1smiles[index]
+    p2smiles = product2smiles[index]
+
+    #make species
+    r1 = Species().fromSMILES(r1smiles)
+    p1 = Species().fromSMILES(p1smiles)
+    r2 = None
+    p2 = None
+    if r2smiles: r2 = Species().fromSMILES(r2smiles)
+    if p2smiles: p2 = Species().fromSMILES(p2smiles)
+
+    #make species labels
+    changeDict={} #necessary in case any species already exists in the speciesDict (isomorphic not same instance)
+    for spec in [r1, r2, p1, p2]:
+        if spec:
+            for speciesLabel, existingSpecies in speciesDict.iteritems():
+                if spec.isIsomorphic(existingSpecies):
+                    changeDict[spec] = existingSpecies #species already in speciesDict
+                    break
+            else: #species is new
+                changeDict[spec] = spec
+                spec_formula = spec.molecule[0].getFormula()
+                if spec_formula not in speciesDict:
+                    spec.label = spec_formula
+                else:
+                    duplicateNumber = 2
+                    while (spec_formula + '-{}'.format(duplicateNumber)) in speciesDict:
+                        duplicateNumber += 1
+                    spec.label = spec_formula + '-{}'.format(duplicateNumber)
+                speciesDict[spec.label] = spec
+
+    #Remake species accounting for duplicates
+    r1 = changeDict[r1]
+    p1 = changeDict[p1]
+    if r2: r2 = changeDict[r2]
+    if p2: p2 = changeDict[p2]
+
+    #create reaction
+    newReaction = None
+    if r2:
+        bimolecular = True
+        if p2:
+            newReaction = Reaction(reactants = [r1, r2], products = [p1, p2])
+        else:
+            newReaction = Reaction(reactants = [r1, r2], products = [p1])
+    elif p2:
+        newReaction = Reaction(reactants = [r1], products = [p1, p2])
+    else:
+        newReaction = Reaction(reactants = [r1], products = [p1])
+
+    #set Arrhenius
+    newArrhenius = Arrhenius()
+    if bimolecular:
+        Ainstance = ScalarQuantity(A[index], AunitsBimolecular)
+    else:
+        Ainstance = ScalarQuantity(A[index], AunitsUnimolecular)
+    newArrhenius = Arrhenius(A = Ainstance,
+                             n = ScalarQuantity(n[index], ''),
+                             Ea = ScalarQuantity(Ea[index], EaUnits))
+
+    #create library entry
+    referenceType = None
+    if referenceList[index]: referenceType = re.sub('.*\.', '', str(referenceList[index].__class__))
+    newEntry = Entry(index= index,
+                     label=str(newReaction),
+                     item=newReaction,
+                     parent=None,
+                     children=None,
+                     data=newArrhenius,
+                     reference=referenceList[index],
+                     referenceType= referenceType,
+                     shortDesc=shortDescList[index],
+                     longDesc=longDescList[index],
+                     rank=None,)
+
+    #add to entry to library
+    library.entries[index] = newEntry
+
+library.save(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/reactions.py'.format(libraryName)))
+library.saveDictionary(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/dictionary.txt'.format(libraryName)))
\ No newline at end of file

From 0381ce9e5453cd688fd69a1f9700c8fa3ccd9311 Mon Sep 17 00:00:00 2001
From: Nathan Yee <nyee@mit.edu>
Date: Wed, 31 Aug 2016 16:34:57 -0400
Subject: [PATCH 2/3] Modify ipython notebook script to be compatible with
 createKineticsLibrary

Specifically changed the way it reads comments, so that it can copy shortDesc and longDesc from the library entry. This is needed in the case where we make the library as an intermediate to making training reactions.
---
 ...rtKineticsLibraryToTrainingReactions.ipynb | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/scripts/convertKineticsLibraryToTrainingReactions.ipynb b/scripts/convertKineticsLibraryToTrainingReactions.ipynb
index 5e1dfffbf7..2250dce58e 100644
--- a/scripts/convertKineticsLibraryToTrainingReactions.ipynb
+++ b/scripts/convertKineticsLibraryToTrainingReactions.ipynb
@@ -91,6 +91,7 @@
    "outputs": [],
    "source": [
     "reactionDict = {}\n",
+    "commentsDict = {} #key is a :class:Reaction object, value is a list of entry attributes [reference, referenceType, shortDesc, longDesc]\n",
     "kineticLibrary = database.kinetics.libraries[libraryName]\n",
     "\n",
     "# html table settings\n",
@@ -102,6 +103,7 @@
     "    lib_rxn.kinetics = entry.data \n",
     "    lib_rxn.index = entry.index\n",
     "    lib_rxn.kinetics.comment = entry.label # Assign the entry's label to the comment\n",
+    "    commentsDict[lib_rxn] = [entry.reference, entry.referenceType, entry.shortDesc, entry.longDesc] #save comments to add to training\n",
     "    # Let's make RMG try to generate this reaction from the families!\n",
     "    fam_rxn_list = []\n",
     "    rxt_mol_mutation_num = 1\n",
@@ -343,7 +345,18 @@
     "    reactions = reactionDict[familyName]\n",
     "    print 'reactions.py previously has {} rxns. Now adding {} new rxn(s).'.format(len(trainingDatabase.entries.values()), len(reactions))\n",
     "    print '================='\n",
-    "    kineticFamily.saveTrainingReactions(reactions, shortDesc='Training reaction from kinetics library: {0}'.format(libraryName))"
+    "    for reaction in reactions:\n",
+    "        shortDesc = ''\n",
+    "        #If the library entry has a shortDesc and longDesc to the training reaction \n",
+    "        if commentsDict[reaction][2]:\n",
+    "            shortDesc = commentsDict[reaction][2]\n",
+    "            reaction.kinetics.comment = str(commentsDict[reaction][3])\n",
+    "        else:\n",
+    "            shortDesc = 'Training reaction from kinetics library: {0}'.format(libraryName)\n",
+    "        kineticFamily.saveTrainingReactions([reaction],\n",
+    "                                            reference = commentsDict[reaction][0],\n",
+    "                                            referenceType = commentsDict[reaction][1],\n",
+    "                                            shortDesc=shortDesc)"
    ]
   },
   {
@@ -530,7 +543,7 @@
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python [default]",
+   "display_name": "Python 2",
    "language": "python",
    "name": "python2"
   },
@@ -544,7 +557,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.12"
+   "version": "2.7.13"
   }
  },
  "nbformat": 4,

From 23dfe63f077403874fc77496929af5f45790be39 Mon Sep 17 00:00:00 2001
From: Nathan Yee <nyee@mit.edu>
Date: Thu, 1 Jun 2017 15:11:23 -0400
Subject: [PATCH 3/3] Fix bug that didn't  properly check all resonances
 structures when creating training reactions

---
 ...rtKineticsLibraryToTrainingReactions.ipynb | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/scripts/convertKineticsLibraryToTrainingReactions.ipynb b/scripts/convertKineticsLibraryToTrainingReactions.ipynb
index 2250dce58e..dc4656b60f 100644
--- a/scripts/convertKineticsLibraryToTrainingReactions.ipynb
+++ b/scripts/convertKineticsLibraryToTrainingReactions.ipynb
@@ -36,6 +36,7 @@
     "from rmgpy.rmg.model import Species\n",
     "from rmgpy import settings\n",
     "from IPython.display import display, HTML, Image\n",
+    "import itertools\n",
     "from base64 import b64encode\n",
     "if compareKinetics:\n",
     "    import numpy as np\n",
@@ -106,21 +107,21 @@
     "    commentsDict[lib_rxn] = [entry.reference, entry.referenceType, entry.shortDesc, entry.longDesc] #save comments to add to training\n",
     "    # Let's make RMG try to generate this reaction from the families!\n",
     "    fam_rxn_list = []\n",
-    "    rxt_mol_mutation_num = 1\n",
-    "    pdt_mol_mutation_num = 1\n",
-    "    for reactant in lib_rxn.reactants:\n",
-    "        rxt_mol_mutation_num *= len(reactant.molecule)\n",
-    "\n",
-    "    for product in lib_rxn.products:\n",
-    "        pdt_mol_mutation_num *= len(product.molecule)\n",
-    "\n",
-    "    for mutation_i in range(rxt_mol_mutation_num):\n",
-    "        rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]\n",
-    "        pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]\n",
+    "    \n",
+    "    #get list of all possible resonance structures of reactant molecules\n",
+    "    if len(lib_rxn.reactants) > 1:\n",
+    "        rxt_resonances = [rxt.molecule for rxt in lib_rxn.reactants]\n",
+    "        rxts_cross = list(itertools.product(*rxt_resonances))\n",
+    "        rxts_cross = [list(cross) for cross in rxts_cross]\n",
+    "    else:\n",
+    "        rxts_cross = [[mol] for mol in lib_rxn.reactants[0].mol] #should have only one reactant\n",
+    "    \n",
+    "    #use families to try find reactions\n",
+    "    for rxts_mol in rxts_cross:\n",
+    "        pdts_mol = [spc.molecule[0] for spc in lib_rxn.products] #do we also need to cross through products.mol\n",
     "        fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(\n",
     "                        reactants=rxts_mol, products=pdts_mol))\n",
     "\n",
-    "\n",
     "    if len(fam_rxn_list) == 1:\n",
     "        fam_rxn = fam_rxn_list[0]\n",
     "\n",