From a54459af3109fdb9e82c86c1ec59429cbc4c1561 Mon Sep 17 00:00:00 2001 From: Mark Goldman Date: Fri, 2 Sep 2016 11:43:48 -0400 Subject: [PATCH] allow for csv input to createKineticsLibrary.py created example csv 'test_data.csv' for inputting data. Removed non-csv input functionality and added only csv functionality. requires Pandas as dependency which isn't currently needed for any core in RMG-Py --- scripts/createKineticsLibrary.py | 122 +++++++++++++++---------------- scripts/test_data.csv | 5 ++ 2 files changed, 65 insertions(+), 62 deletions(-) create mode 100644 scripts/test_data.csv diff --git a/scripts/createKineticsLibrary.py b/scripts/createKineticsLibrary.py index fd661f6f30..be4b8f8144 100644 --- a/scripts/createKineticsLibrary.py +++ b/scripts/createKineticsLibrary.py @@ -9,6 +9,9 @@ from rmgpy.data.base import Entry from rmgpy.reaction import Reaction import re +import sys +import pandas as pd +import numpy as np """ The purpose of this script is to create a reaction library from the least amount of input. @@ -30,73 +33,47 @@ This library is a test created to showcase the createKineticsLibrary.py script """ -#smiles for reactants and products -#can use empty string of None for unimolecular reactions -react1smiles = ["CC=CC", "C=C", 'CCCCC[CH2]'] -react2smiles = ['[CH3]', '[CH3]', ''] -product1smiles = ["[CH2]C=CC", "[CH]=C", "C[CH]CCCC"] -product2smiles = ['C', 'C', None] +#read data from csv +try: + csv_file_name = sys.argv[1] +except: + raise Exception('must input the path to a csv file for importing kinetics') +data = pd.read_csv(csv_file_name, + dtype={'year':str,'pages':str,'volume':str,'journal':str,'article title':str,'Author list':str,'short description':str,'long description':str,'reactant 1 SMILES':str,'reactant 2 SMILES':str, 'product 1 SMILES':str, 'product 2 SMILES':str}, + skipinitialspace=True, + na_values=['None','none','NA','Na'], + names=[u'reactant 1 SMILES', u'reactant 2 SMILES', + u'product 1 SMILES', u'product 2 SMILES', + u'A', u'n', u'Ea', + u'short description', u'long description', + u'Author list', u'article title', u'journal', + u'volume', u'pages', u'year'], + header=0) -#kinetics parameters AunitsBimolecular = 'cm^3/(mol*s)' AunitsUnimolecular = 's^-1' EaUnits = 'kcal/mol' -A = [18.06, 26.3, 18.4] -n = [3.27, 3.24, 3.27] -Ea = [6.85, 7.03, 7.15] - -#write comments -shortDesc0 = u"""CBS-QB3 calculation with 1-d rotor treatment at B3LYP/631G(d)""" -longDesc0 = u""" -Quantum chemistry calculations CBS-QB3 calculation with 1-d rotor treatment at -B3LYP/631G(d)" using Gaussian 03 and Gaussian 09. High-pressure-limit rate -coefficient computed TST with Eckart Tunnelling" -""" -reference0 = Article( - authors=["K. Wang", "S. Villano", "A. Dean"], - title=u'Reactions of allylic radicals that impact molecular weight growth kinetics', - journal="Phys. Chem. Chem. Phys.", - volume="17", - pages="""6255-6273""", - year="2015", -) -# flexible lists so that different entries can have different comments -# if you want no shortDesc or longDesc, you should use empty strings for Desc -shortDescList= [shortDesc0, '', ''] -longDescList = [longDesc0, '', ''] -# if you want no reference you should use None -referenceList = [reference0, None, None] + #done with inputs ################################################################################################################# -#check list lengths (useful check for some assurance you've entered things correctly) -length = len(react1smiles) -assert len(react2smiles) == length, "react2smiles has a different length than other lists" -assert len(product1smiles) == length, "product1smiles has a different length than other lists" -assert len(product2smiles) == length, "product2smiles has a different length than other lists" -assert len(A) == length, "A has a different length than other lists" -assert len(n) == length, "n has a different length than other lists" -assert len(Ea) == length, "Ea has a different length than other lists" -assert len(shortDescList) == length, "shortDescList has a different length than other lists" -assert len(longDescList) == length, "longDescList has a different length than other lists" -assert len(referenceList) == length, "referenceList has a different length than other lists" - #create entries in the library speciesDict={} -for index, r1smiles in enumerate(react1smiles): +for index in data.index: + series = data.iloc[index] + bimolecular = False #example item - r2smiles = react2smiles[index] - p1smiles = product1smiles[index] - p2smiles = product2smiles[index] #make species - r1 = Species().fromSMILES(r1smiles) - p1 = Species().fromSMILES(p1smiles) + r1 = Species().fromSMILES(series['reactant 1 SMILES']) + p1 = Species().fromSMILES(series['product 1 SMILES']) r2 = None p2 = None - if r2smiles: r2 = Species().fromSMILES(r2smiles) - if p2smiles: p2 = Species().fromSMILES(p2smiles) + if pd.notnull(series['reactant 2 SMILES']): + r2 = Species().fromSMILES(series['reactant 2 SMILES']) + if pd.notnull(series['product 2 SMILES']): + p2 = Species().fromSMILES(series['product 2 SMILES']) #make species labels changeDict={} #necessary in case any species already exists in the speciesDict (isomorphic not same instance) @@ -138,32 +115,53 @@ newReaction = Reaction(reactants = [r1], products = [p1]) #set Arrhenius + newArrhenius = Arrhenius() if bimolecular: - Ainstance = ScalarQuantity(A[index], AunitsBimolecular) + Ainstance = ScalarQuantity(series['A'], AunitsBimolecular) else: - Ainstance = ScalarQuantity(A[index], AunitsUnimolecular) + Ainstance = ScalarQuantity(series['A'], AunitsUnimolecular) newArrhenius = Arrhenius(A = Ainstance, - n = ScalarQuantity(n[index], ''), - Ea = ScalarQuantity(Ea[index], EaUnits)) + n = ScalarQuantity(series['n'], ''), + Ea = ScalarQuantity(series['Ea'], EaUnits)) - #create library entry + #create reference item for library entry referenceType = None - if referenceList[index]: referenceType = re.sub('.*\.', '', str(referenceList[index].__class__)) + reference = None + if pd.notnull(series['Author list']): + #separate authors + author_list = series['Author list'].split(';') + #remove white space + author_list = [re.sub('(\A\s|\s\Z)*','', author) for author in author_list] + reference = Article( + authors=author_list, + title=series['article title'], + journal=series['journal'], + volume=series['volume'], + pages=series['pages'], + year=series['year'],) + referenceType = re.sub('.*\.', '', str(reference.__class__)) + + #convert descriptions from nan to '' + if pd.notnull(series['short description']): shortDesc = series['short description'] + else: shortDesc = '' + if pd.notnull(series['long description']): longDesc = series['long description'] + else: longDesc = '' + #create library entry newEntry = Entry(index= index, label=str(newReaction), item=newReaction, parent=None, children=None, data=newArrhenius, - reference=referenceList[index], + reference=reference, referenceType= referenceType, - shortDesc=shortDescList[index], - longDesc=longDescList[index], + shortDesc=shortDesc, + longDesc=longDesc, rank=None,) #add to entry to library library.entries[index] = newEntry library.save(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/reactions.py'.format(libraryName))) -library.saveDictionary(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/dictionary.txt'.format(libraryName))) \ No newline at end of file +library.saveDictionary(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/dictionary.txt'.format(libraryName))) diff --git a/scripts/test_data.csv b/scripts/test_data.csv new file mode 100644 index 0000000000..5e368a0afd --- /dev/null +++ b/scripts/test_data.csv @@ -0,0 +1,5 @@ +reactant 1 SMILES, reactant 2 SMILES (optional), product 1 SMILES, product 2 SMILES (optional), A (cm3/mol-s or 1/s), n, Ea (kcal/mol), short description, long description, Author list (semicolon separated), article title, journal, volume, pages, year +CC=CC,[CH3],[CH2]C=CC,C,18.06,3.27,6.85,CBS-QB3 calculation with 1-d rotor treatment at B3LYP/631G(d),Quantum chemistry calculations CBS-QB3 calculation with 1-d rotor treatment at+B3LYP/631G(d)" using Gaussian 03 and Gaussian 09. High-pressure-limit rate coefficient computed TST with Eckart Tunnelling, K. Wang ; S. Villano;A. Dean ,Reactions of allylic radicals that impact molecular weight growth kinetics,Phys. Chem. Chem. Phys.,17,6255-6273,2015 +C=C,[CH3],[CH]=C,C,26.3,3.24,3.27,,,None,,,,, +CCCCC[CH2],,C[CH]CCCC,None,18.4,7.03,7.15,,,,,,,, +