Skip to content

Commit

Permalink
allow for csv input to createKineticsLibrary.py
Browse files Browse the repository at this point in the history
created example csv 'test_data.csv' for inputting data. Removed non-csv input
functionality and added only csv functionality. requires Pandas as dependency
which isn't currently needed for any core in RMG-Py
  • Loading branch information
goldmanm committed Sep 2, 2016
1 parent 63b2b0e commit a54459a
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 62 deletions.
122 changes: 60 additions & 62 deletions scripts/createKineticsLibrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from rmgpy.data.base import Entry
from rmgpy.reaction import Reaction
import re
import sys
import pandas as pd
import numpy as np
"""
The purpose of this script is to create a reaction library from the least amount of input.
Expand All @@ -30,73 +33,47 @@
This library is a test created to showcase the createKineticsLibrary.py script
"""

#smiles for reactants and products
#can use empty string of None for unimolecular reactions
react1smiles = ["CC=CC", "C=C", 'CCCCC[CH2]']
react2smiles = ['[CH3]', '[CH3]', '']
product1smiles = ["[CH2]C=CC", "[CH]=C", "C[CH]CCCC"]
product2smiles = ['C', 'C', None]
#read data from csv
try:
csv_file_name = sys.argv[1]
except:
raise Exception('must input the path to a csv file for importing kinetics')
data = pd.read_csv(csv_file_name,
dtype={'year':str,'pages':str,'volume':str,'journal':str,'article title':str,'Author list':str,'short description':str,'long description':str,'reactant 1 SMILES':str,'reactant 2 SMILES':str, 'product 1 SMILES':str, 'product 2 SMILES':str},
skipinitialspace=True,
na_values=['None','none','NA','Na'],
names=[u'reactant 1 SMILES', u'reactant 2 SMILES',
u'product 1 SMILES', u'product 2 SMILES',
u'A', u'n', u'Ea',
u'short description', u'long description',
u'Author list', u'article title', u'journal',
u'volume', u'pages', u'year'],
header=0)

#kinetics parameters
AunitsBimolecular = 'cm^3/(mol*s)'
AunitsUnimolecular = 's^-1'
EaUnits = 'kcal/mol'
A = [18.06, 26.3, 18.4]
n = [3.27, 3.24, 3.27]
Ea = [6.85, 7.03, 7.15]

#write comments
shortDesc0 = u"""CBS-QB3 calculation with 1-d rotor treatment at B3LYP/631G(d)"""
longDesc0 = u"""
Quantum chemistry calculations CBS-QB3 calculation with 1-d rotor treatment at
B3LYP/631G(d)" using Gaussian 03 and Gaussian 09. High-pressure-limit rate
coefficient computed TST with Eckart Tunnelling"
"""
reference0 = Article(
authors=["K. Wang", "S. Villano", "A. Dean"],
title=u'Reactions of allylic radicals that impact molecular weight growth kinetics',
journal="Phys. Chem. Chem. Phys.",
volume="17",
pages="""6255-6273""",
year="2015",
)
# flexible lists so that different entries can have different comments
# if you want no shortDesc or longDesc, you should use empty strings for Desc
shortDescList= [shortDesc0, '', '']
longDescList = [longDesc0, '', '']
# if you want no reference you should use None
referenceList = [reference0, None, None]


#done with inputs
#################################################################################################################
#check list lengths (useful check for some assurance you've entered things correctly)
length = len(react1smiles)
assert len(react2smiles) == length, "react2smiles has a different length than other lists"
assert len(product1smiles) == length, "product1smiles has a different length than other lists"
assert len(product2smiles) == length, "product2smiles has a different length than other lists"
assert len(A) == length, "A has a different length than other lists"
assert len(n) == length, "n has a different length than other lists"
assert len(Ea) == length, "Ea has a different length than other lists"
assert len(shortDescList) == length, "shortDescList has a different length than other lists"
assert len(longDescList) == length, "longDescList has a different length than other lists"
assert len(referenceList) == length, "referenceList has a different length than other lists"

#create entries in the library
speciesDict={}
for index, r1smiles in enumerate(react1smiles):
for index in data.index:
series = data.iloc[index]

bimolecular = False
#example item
r2smiles = react2smiles[index]
p1smiles = product1smiles[index]
p2smiles = product2smiles[index]

#make species
r1 = Species().fromSMILES(r1smiles)
p1 = Species().fromSMILES(p1smiles)
r1 = Species().fromSMILES(series['reactant 1 SMILES'])
p1 = Species().fromSMILES(series['product 1 SMILES'])
r2 = None
p2 = None
if r2smiles: r2 = Species().fromSMILES(r2smiles)
if p2smiles: p2 = Species().fromSMILES(p2smiles)
if pd.notnull(series['reactant 2 SMILES']):
r2 = Species().fromSMILES(series['reactant 2 SMILES'])
if pd.notnull(series['product 2 SMILES']):
p2 = Species().fromSMILES(series['product 2 SMILES'])

#make species labels
changeDict={} #necessary in case any species already exists in the speciesDict (isomorphic not same instance)
Expand Down Expand Up @@ -138,32 +115,53 @@
newReaction = Reaction(reactants = [r1], products = [p1])

#set Arrhenius

newArrhenius = Arrhenius()
if bimolecular:
Ainstance = ScalarQuantity(A[index], AunitsBimolecular)
Ainstance = ScalarQuantity(series['A'], AunitsBimolecular)
else:
Ainstance = ScalarQuantity(A[index], AunitsUnimolecular)
Ainstance = ScalarQuantity(series['A'], AunitsUnimolecular)
newArrhenius = Arrhenius(A = Ainstance,
n = ScalarQuantity(n[index], ''),
Ea = ScalarQuantity(Ea[index], EaUnits))
n = ScalarQuantity(series['n'], ''),
Ea = ScalarQuantity(series['Ea'], EaUnits))

#create library entry
#create reference item for library entry
referenceType = None
if referenceList[index]: referenceType = re.sub('.*\.', '', str(referenceList[index].__class__))
reference = None
if pd.notnull(series['Author list']):
#separate authors
author_list = series['Author list'].split(';')
#remove white space
author_list = [re.sub('(\A\s|\s\Z)*','', author) for author in author_list]
reference = Article(
authors=author_list,
title=series['article title'],
journal=series['journal'],
volume=series['volume'],
pages=series['pages'],
year=series['year'],)
referenceType = re.sub('.*\.', '', str(reference.__class__))

#convert descriptions from nan to ''
if pd.notnull(series['short description']): shortDesc = series['short description']
else: shortDesc = ''
if pd.notnull(series['long description']): longDesc = series['long description']
else: longDesc = ''
#create library entry
newEntry = Entry(index= index,
label=str(newReaction),
item=newReaction,
parent=None,
children=None,
data=newArrhenius,
reference=referenceList[index],
reference=reference,
referenceType= referenceType,
shortDesc=shortDescList[index],
longDesc=longDescList[index],
shortDesc=shortDesc,
longDesc=longDesc,
rank=None,)

#add to entry to library
library.entries[index] = newEntry

library.save(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/reactions.py'.format(libraryName)))
library.saveDictionary(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/dictionary.txt'.format(libraryName)))
library.saveDictionary(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/dictionary.txt'.format(libraryName)))
5 changes: 5 additions & 0 deletions scripts/test_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
reactant 1 SMILES, reactant 2 SMILES (optional), product 1 SMILES, product 2 SMILES (optional), A (cm3/mol-s or 1/s), n, Ea (kcal/mol), short description, long description, Author list (semicolon separated), article title, journal, volume, pages, year
CC=CC,[CH3],[CH2]C=CC,C,18.06,3.27,6.85,CBS-QB3 calculation with 1-d rotor treatment at B3LYP/631G(d),Quantum chemistry calculations CBS-QB3 calculation with 1-d rotor treatment at+B3LYP/631G(d)" using Gaussian 03 and Gaussian 09. High-pressure-limit rate coefficient computed TST with Eckart Tunnelling, K. Wang ; S. Villano;A. Dean ,Reactions of allylic radicals that impact molecular weight growth kinetics,Phys. Chem. Chem. Phys.,17,6255-6273,2015
C=C,[CH3],[CH]=C,C,26.3,3.24,3.27,,,None,,,,,
CCCCC[CH2],,C[CH]CCCC,None,18.4,7.03,7.15,,,,,,,,

0 comments on commit a54459a

Please sign in to comment.