allow for csv input to createKineticsLibrary.py

created example csv 'test_data.csv' for inputting data. Removed non-csv input functionality and added only csv functionality. requires Pandas as dependency which isn't currently needed for any core in RMG-Py
ReactionMechanismGenerator · Sep 2, 2016 · a54459a · a54459a
1 parent 63b2b0e
commit a54459a
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 62 deletions.
diff --git a/scripts/createKineticsLibrary.py b/scripts/createKineticsLibrary.py
@@ -9,6 +9,9 @@
 from rmgpy.data.base import Entry
 from rmgpy.reaction import Reaction
 import re
+import sys
+import pandas as pd
+import numpy as np
 """
 The purpose of this script is to create a reaction library from the least amount of input.
 
@@ -30,73 +33,47 @@
 This library is a test created to showcase the createKineticsLibrary.py script
 """
 
-#smiles for reactants and products
-#can use empty string of None for unimolecular reactions
-react1smiles = ["CC=CC", "C=C", 'CCCCC[CH2]']
-react2smiles = ['[CH3]', '[CH3]', '']
-product1smiles = ["[CH2]C=CC", "[CH]=C", "C[CH]CCCC"]
-product2smiles = ['C', 'C', None]
+#read data from csv
+try:
+    csv_file_name = sys.argv[1]
+except:
+    raise Exception('must input the path to a csv file for importing kinetics')
+data = pd.read_csv(csv_file_name,
+    dtype={'year':str,'pages':str,'volume':str,'journal':str,'article title':str,'Author list':str,'short description':str,'long description':str,'reactant 1 SMILES':str,'reactant 2 SMILES':str, 'product 1 SMILES':str, 'product 2 SMILES':str}, 
+    skipinitialspace=True,
+    na_values=['None','none','NA','Na'],
+    names=[u'reactant 1 SMILES', u'reactant 2 SMILES',
+       u'product 1 SMILES', u'product 2 SMILES',
+       u'A', u'n', u'Ea',
+       u'short description', u'long description',
+       u'Author list', u'article title', u'journal',
+       u'volume', u'pages', u'year'],
+    header=0)
 
-#kinetics parameters
 AunitsBimolecular = 'cm^3/(mol*s)'
 AunitsUnimolecular = 's^-1'
 EaUnits = 'kcal/mol'
-A = [18.06, 26.3, 18.4]
-n = [3.27, 3.24, 3.27]
-Ea = [6.85, 7.03, 7.15]
-
-#write comments
-shortDesc0 = u"""CBS-QB3 calculation with 1-d rotor treatment at B3LYP/631G(d)"""
-longDesc0 = u"""
-Quantum chemistry calculations CBS-QB3 calculation with 1-d rotor treatment at
-B3LYP/631G(d)" using Gaussian 03 and Gaussian 09. High-pressure-limit rate
-coefficient computed TST with Eckart Tunnelling"
-"""
-reference0 = Article(
-    authors=["K. Wang", "S. Villano", "A. Dean"],
-    title=u'Reactions of allylic radicals that impact molecular weight growth kinetics',
-    journal="Phys. Chem. Chem. Phys.",
-    volume="17",
-    pages="""6255-6273""",
-    year="2015",
-)
-# flexible lists so that different entries can have different comments
-# if you want no shortDesc or longDesc, you should use empty strings for Desc
-shortDescList= [shortDesc0, '', '']
-longDescList = [longDesc0, '', '']
-# if you want no reference you should use None
-referenceList = [reference0, None, None]
+
 
 #done with inputs
 #################################################################################################################
-#check list lengths (useful check for some assurance you've entered things correctly)
-length = len(react1smiles)
-assert len(react2smiles) == length, "react2smiles has a different length than other lists"
-assert len(product1smiles) == length, "product1smiles has a different length than other lists"
-assert len(product2smiles) == length, "product2smiles has a different length than other lists"
-assert len(A) == length, "A has a different length than other lists"
-assert len(n) == length, "n has a different length than other lists"
-assert len(Ea) == length, "Ea has a different length than other lists"
-assert len(shortDescList) == length, "shortDescList has a different length than other lists"
-assert len(longDescList) == length, "longDescList has a different length than other lists"
-assert len(referenceList) == length, "referenceList has a different length than other lists"
-
 #create entries in the library
 speciesDict={}
-for index, r1smiles in enumerate(react1smiles):
+for index in data.index:
+    series = data.iloc[index]
+
     bimolecular = False
     #example item
-    r2smiles = react2smiles[index]
-    p1smiles = product1smiles[index]
-    p2smiles = product2smiles[index]
 
     #make species
-    r1 = Species().fromSMILES(r1smiles)
-    p1 = Species().fromSMILES(p1smiles)
+    r1 = Species().fromSMILES(series['reactant 1 SMILES'])
+    p1 = Species().fromSMILES(series['product 1 SMILES'])
     r2 = None
     p2 = None
-    if r2smiles: r2 = Species().fromSMILES(r2smiles)
-    if p2smiles: p2 = Species().fromSMILES(p2smiles)
+    if pd.notnull(series['reactant 2 SMILES']): 
+        r2 = Species().fromSMILES(series['reactant 2 SMILES'])
+    if pd.notnull(series['product 2 SMILES']): 
+        p2 = Species().fromSMILES(series['product 2 SMILES'])
 
     #make species labels
     changeDict={} #necessary in case any species already exists in the speciesDict (isomorphic not same instance)
@@ -138,32 +115,53 @@
         newReaction = Reaction(reactants = [r1], products = [p1])
 
     #set Arrhenius
+
     newArrhenius = Arrhenius()
     if bimolecular:
-        Ainstance = ScalarQuantity(A[index], AunitsBimolecular)
+        Ainstance = ScalarQuantity(series['A'], AunitsBimolecular)
     else:
-        Ainstance = ScalarQuantity(A[index], AunitsUnimolecular)
+        Ainstance = ScalarQuantity(series['A'], AunitsUnimolecular)
     newArrhenius = Arrhenius(A = Ainstance,
-                             n = ScalarQuantity(n[index], ''),
-                             Ea = ScalarQuantity(Ea[index], EaUnits))
+                             n = ScalarQuantity(series['n'], ''),
+                             Ea = ScalarQuantity(series['Ea'], EaUnits))
 
-    #create library entry
+    #create reference item for library entry
     referenceType = None
-    if referenceList[index]: referenceType = re.sub('.*\.', '', str(referenceList[index].__class__))
+    reference = None
+    if pd.notnull(series['Author list']):
+        #separate authors
+        author_list = series['Author list'].split(';')
+        #remove white space
+        author_list = [re.sub('(\A\s|\s\Z)*','', author) for author in author_list]
+        reference = Article(
+            authors=author_list,
+            title=series['article title'],
+            journal=series['journal'],
+            volume=series['volume'],
+            pages=series['pages'],
+            year=series['year'],)
+        referenceType = re.sub('.*\.', '', str(reference.__class__))
+
+    #convert descriptions from nan to ''
+    if pd.notnull(series['short description']): shortDesc = series['short description']
+    else: shortDesc = ''
+    if pd.notnull(series['long description']): longDesc = series['long description']
+    else: longDesc = ''
+    #create library entry
     newEntry = Entry(index= index,
                      label=str(newReaction),
                      item=newReaction,
                      parent=None,
                      children=None,
                      data=newArrhenius,
-                     reference=referenceList[index],
+                     reference=reference,
                      referenceType= referenceType,
-                     shortDesc=shortDescList[index],
-                     longDesc=longDescList[index],
+                     shortDesc=shortDesc,
+                     longDesc=longDesc,
                      rank=None,)
 
     #add to entry to library
     library.entries[index] = newEntry
 
 library.save(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/reactions.py'.format(libraryName)))
-library.saveDictionary(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/dictionary.txt'.format(libraryName)))
+library.saveDictionary(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/dictionary.txt'.format(libraryName)))
diff --git a/scripts/test_data.csv b/scripts/test_data.csv
@@ -0,0 +1,5 @@
+reactant 1 SMILES, reactant 2 SMILES (optional), product 1 SMILES, product 2 SMILES (optional), A (cm3/mol-s or 1/s), n, Ea (kcal/mol), short description, long description, Author list (semicolon separated), article title, journal, volume, pages, year
+CC=CC,[CH3],[CH2]C=CC,C,18.06,3.27,6.85,CBS-QB3 calculation with 1-d rotor treatment at B3LYP/631G(d),Quantum chemistry calculations CBS-QB3 calculation with 1-d rotor treatment at+B3LYP/631G(d)" using Gaussian 03 and Gaussian 09. High-pressure-limit rate coefficient computed TST with Eckart Tunnelling, K. Wang ; S. Villano;A. Dean ,Reactions of allylic radicals that impact molecular weight growth kinetics,Phys. Chem. Chem. Phys.,17,6255-6273,2015
+C=C,[CH3],[CH]=C,C,26.3,3.24,3.27,,,None,,,,,
+CCCCC[CH2],,C[CH]CCCC,None,18.4,7.03,7.15,,,,,,,,
+