Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add training #125

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 29 additions & 15 deletions scripts/convertKineticsLibraryToTrainingReactions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"from rmgpy.rmg.model import Species\n",
"from rmgpy import settings\n",
"from IPython.display import display, HTML, Image\n",
"import itertools\n",
"from base64 import b64encode\n",
"if compareKinetics:\n",
" import numpy as np\n",
Expand Down Expand Up @@ -91,6 +92,7 @@
"outputs": [],
"source": [
"reactionDict = {}\n",
"commentsDict = {} #key is a :class:Reaction object, value is a list of entry attributes [reference, referenceType, shortDesc, longDesc]\n",
"kineticLibrary = database.kinetics.libraries[libraryName]\n",
"\n",
"# html table settings\n",
Expand All @@ -102,23 +104,24 @@
" lib_rxn.kinetics = entry.data \n",
" lib_rxn.index = entry.index\n",
" lib_rxn.kinetics.comment = entry.label # Assign the entry's label to the comment\n",
" commentsDict[lib_rxn] = [entry.reference, entry.referenceType, entry.shortDesc, entry.longDesc] #save comments to add to training\n",
" # Let's make RMG try to generate this reaction from the families!\n",
" fam_rxn_list = []\n",
" rxt_mol_mutation_num = 1\n",
" pdt_mol_mutation_num = 1\n",
" for reactant in lib_rxn.reactants:\n",
" rxt_mol_mutation_num *= len(reactant.molecule)\n",
"\n",
" for product in lib_rxn.products:\n",
" pdt_mol_mutation_num *= len(product.molecule)\n",
"\n",
" for mutation_i in range(rxt_mol_mutation_num):\n",
" rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]\n",
" pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]\n",
" \n",
" #get list of all possible resonance structures of reactant molecules\n",
" if len(lib_rxn.reactants) > 1:\n",
" rxt_resonances = [rxt.molecule for rxt in lib_rxn.reactants]\n",
" rxts_cross = list(itertools.product(*rxt_resonances))\n",
" rxts_cross = [list(cross) for cross in rxts_cross]\n",
" else:\n",
" rxts_cross = [[mol] for mol in lib_rxn.reactants[0].mol] #should have only one reactant\n",
" \n",
" #use families to try find reactions\n",
" for rxts_mol in rxts_cross:\n",
" pdts_mol = [spc.molecule[0] for spc in lib_rxn.products] #do we also need to cross through products.mol\n",
" fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(\n",
" reactants=rxts_mol, products=pdts_mol))\n",
"\n",
"\n",
" if len(fam_rxn_list) == 1:\n",
" fam_rxn = fam_rxn_list[0]\n",
"\n",
Expand Down Expand Up @@ -343,7 +346,18 @@
" reactions = reactionDict[familyName]\n",
" print 'reactions.py previously has {} rxns. Now adding {} new rxn(s).'.format(len(trainingDatabase.entries.values()), len(reactions))\n",
" print '================='\n",
" kineticFamily.saveTrainingReactions(reactions, shortDesc='Training reaction from kinetics library: {0}'.format(libraryName))"
" for reaction in reactions:\n",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reminder to self: I wrote this for loop workaround to solve the issue that family.saveTrainingReactions doesn't seem to allow different meta-data (shortDesc, reference, referenceType, longDesc) per reactions. The code now labels every single reaction with the same index as well as creates an extra line break after every entry.
Some combination of this causes the training reactions to be un-usable without manual edits

Both of these are taken account into the family. saveTrainingReactions. My initial thoughts is to change this function to allow different meta-data rather than trying to debug the work-around.

" shortDesc = ''\n",
" #If the library entry has a shortDesc and longDesc to the training reaction \n",
" if commentsDict[reaction][2]:\n",
" shortDesc = commentsDict[reaction][2]\n",
" reaction.kinetics.comment = str(commentsDict[reaction][3])\n",
" else:\n",
" shortDesc = 'Training reaction from kinetics library: {0}'.format(libraryName)\n",
" kineticFamily.saveTrainingReactions([reaction],\n",
" reference = commentsDict[reaction][0],\n",
" referenceType = commentsDict[reaction][1],\n",
" shortDesc=shortDesc)"
]
},
{
Expand Down Expand Up @@ -530,7 +544,7 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
Expand All @@ -544,7 +558,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
"version": "2.7.13"
}
},
"nbformat": 4,
Expand Down
169 changes: 169 additions & 0 deletions scripts/createKineticsLibrary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
from rmgpy.rmg.model import Species
from rmgpy import settings
from rmgpy.data.kinetics.library import KineticsLibrary

from rmgpy.data.reference import *
import os.path
from rmgpy.kinetics.arrhenius import Arrhenius
from rmgpy.quantity import ScalarQuantity
from rmgpy.data.base import Entry
from rmgpy.reaction import Reaction
import re
"""
The purpose of this script is to create a reaction library from the least amount of input.

Might eventually make it so it reads in a csv instead of hard-coding inputs below.

Currently the numbers below are all made up, so they should not be used by any means.

For the inputs, every variable that is a list has elements where the indexes correspond with entry in another list
For example, one complete data entry for Arrhenius parameters would be A[i], n[i], and Ea[i]
"""

#initalize library variables
libraryName = 'test'
library = KineticsLibrary()
library.name = libraryName
library.label =libraryName
library.shortDesc = "Library with made up rates"
library.longDesc = """
This library is a test created to showcase the createKineticsLibrary.py script
"""

#smiles for reactants and products
#can use empty string of None for unimolecular reactions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change 'of' to 'or'

react1smiles = ["CC=CC", "C=C", 'CCCCC[CH2]']
react2smiles = ['[CH3]', '[CH3]', '']
product1smiles = ["[CH2]C=CC", "[CH]=C", "C[CH]CCCC"]
product2smiles = ['C', 'C', None]

#kinetics parameters
AunitsBimolecular = 'cm^3/(mol*s)'
AunitsUnimolecular = 's^-1'
EaUnits = 'kcal/mol'
A = [18.06, 26.3, 18.4]
n = [3.27, 3.24, 3.27]
Ea = [6.85, 7.03, 7.15]

#write comments
shortDesc0 = u"""CBS-QB3 calculation with 1-d rotor treatment at B3LYP/631G(d)"""
longDesc0 = u"""
Quantum chemistry calculations CBS-QB3 calculation with 1-d rotor treatment at
B3LYP/631G(d)" using Gaussian 03 and Gaussian 09. High-pressure-limit rate
coefficient computed TST with Eckart Tunnelling"
"""
reference0 = Article(
authors=["K. Wang", "S. Villano", "A. Dean"],
title=u'Reactions of allylic radicals that impact molecular weight growth kinetics',
journal="Phys. Chem. Chem. Phys.",
volume="17",
pages="""6255-6273""",
year="2015",
)
# flexible lists so that different entries can have different comments
# if you want no shortDesc or longDesc, you should use empty strings for Desc
shortDescList= [shortDesc0, '', '']
longDescList = [longDesc0, '', '']
# if you want no reference you should use None
referenceList = [reference0, None, None]

#done with inputs
#################################################################################################################
#check list lengths (useful check for some assurance you've entered things correctly)
length = len(react1smiles)
assert len(react2smiles) == length, "react2smiles has a different length than other lists"
assert len(product1smiles) == length, "product1smiles has a different length than other lists"
assert len(product2smiles) == length, "product2smiles has a different length than other lists"
assert len(A) == length, "A has a different length than other lists"
assert len(n) == length, "n has a different length than other lists"
assert len(Ea) == length, "Ea has a different length than other lists"
assert len(shortDescList) == length, "shortDescList has a different length than other lists"
assert len(longDescList) == length, "longDescList has a different length than other lists"
assert len(referenceList) == length, "referenceList has a different length than other lists"

#create entries in the library
speciesDict={}
for index, r1smiles in enumerate(react1smiles):
bimolecular = False
#example item
r2smiles = react2smiles[index]
p1smiles = product1smiles[index]
p2smiles = product2smiles[index]

#make species
r1 = Species().fromSMILES(r1smiles)
p1 = Species().fromSMILES(p1smiles)
r2 = None
p2 = None
if r2smiles: r2 = Species().fromSMILES(r2smiles)
if p2smiles: p2 = Species().fromSMILES(p2smiles)

#make species labels
changeDict={} #necessary in case any species already exists in the speciesDict (isomorphic not same instance)
for spec in [r1, r2, p1, p2]:
if spec:
for speciesLabel, existingSpecies in speciesDict.iteritems():
if spec.isIsomorphic(existingSpecies):
changeDict[spec] = existingSpecies #species already in speciesDict
break
else: #species is new
changeDict[spec] = spec
spec_formula = spec.molecule[0].getFormula()
if spec_formula not in speciesDict:
spec.label = spec_formula
else:
duplicateNumber = 2
while (spec_formula + '-{}'.format(duplicateNumber)) in speciesDict:
duplicateNumber += 1
spec.label = spec_formula + '-{}'.format(duplicateNumber)
speciesDict[spec.label] = spec

#Remake species accounting for duplicates
r1 = changeDict[r1]
p1 = changeDict[p1]
if r2: r2 = changeDict[r2]
if p2: p2 = changeDict[p2]

#create reaction
newReaction = None
if r2:
bimolecular = True
if p2:
newReaction = Reaction(reactants = [r1, r2], products = [p1, p2])
else:
newReaction = Reaction(reactants = [r1, r2], products = [p1])
elif p2:
newReaction = Reaction(reactants = [r1], products = [p1, p2])
else:
newReaction = Reaction(reactants = [r1], products = [p1])

#set Arrhenius
newArrhenius = Arrhenius()
if bimolecular:
Ainstance = ScalarQuantity(A[index], AunitsBimolecular)
else:
Ainstance = ScalarQuantity(A[index], AunitsUnimolecular)
newArrhenius = Arrhenius(A = Ainstance,
n = ScalarQuantity(n[index], ''),
Ea = ScalarQuantity(Ea[index], EaUnits))

#create library entry
referenceType = None
if referenceList[index]: referenceType = re.sub('.*\.', '', str(referenceList[index].__class__))
newEntry = Entry(index= index,
label=str(newReaction),
item=newReaction,
parent=None,
children=None,
data=newArrhenius,
reference=referenceList[index],
referenceType= referenceType,
shortDesc=shortDescList[index],
longDesc=longDescList[index],
rank=None,)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it important to have rank in the inputted data?


#add to entry to library
library.entries[index] = newEntry

library.save(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/reactions.py'.format(libraryName)))
library.saveDictionary(os.path.join(settings['database.directory'],'kinetics/libraries/{0}/dictionary.txt'.format(libraryName)))