Skip to content

Commit

Permalink
addition of a database test for unimolecular groups having multiple t…
Browse files Browse the repository at this point in the history
…rees, checks that end group labels are consistent throughout each end group tree, that the backbone has all labels present in the end groups and labels indicating the shortest path between end groups, and that each end group subgraph in each entry in the backbone tree is the top level of the corresponding end group tree
  • Loading branch information
mjohnson541 authored and nyee committed Feb 19, 2017
1 parent 76407af commit df78915
Showing 1 changed file with 182 additions and 3 deletions.
185 changes: 182 additions & 3 deletions testing/databaseTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ def test_kinetics(self):
self.compat_func_name = test_name
yield test, family_name

if len(family.forwardTemplate.reactants)==1 and len(family.groups.top) != 1 and family_name != 'Diels_alder_addition':
test = lambda x: self.kinetics_checkUnimolecularGroups(family_name)
test_name = "Kinetics family {0} check that unimolecular group is formatted correctly?".format(family_name)
test.description = test_name
self.compat_func_name = test_name
yield test, family_name

for depository in family.depositories:

test = lambda x: self.kinetics_checkAdjlistsNonidentical(depository)
Expand All @@ -95,7 +102,8 @@ def test_kinetics(self):
test.description = test_name
self.compat_func_name = test_name
yield test, library_name



def test_thermo(self):
for group_name, group in self.database.thermo.groups.iteritems():
test = lambda x: self.general_checkNodesFoundInTree(group_name, group)
Expand Down Expand Up @@ -439,8 +447,179 @@ def kinetics_checkCdAtomType(self, family_name):
The following adjList may have atoms in a different ordering than the input file:
{4}
""".format(family_name, entry, correctAtom, index+1, entry.item.toAdjacencyList()))



def kinetics_checkUnimolecularGroups(self,family_name):
"""
This test goes through all unimolecular groups that have more than one top level, the first
top level is assumed to be the backbone (contains the whole reactant molecule) and the other top levels are assumed
to be endgroups
the following are checked:
1)endgroup entries have the same labels as their top level entry
2)backbone groups have all labels that endgroups have
3)backbone groups have labels tracing between the endgroups that follow the shortest path
4)the backbone subgraph corresponding to each endgroup is the top level entry of the
corresponding endgroup for every endgroup
"""
def find_shortest_path(start, end, path=None):
path = path if path else []
path = path + [start]
if start == end:
return path

shortest = None
for node,_ in start.bonds.iteritems():
if node not in path:
newpath = find_shortest_path(node, end, path)
if newpath:
if not shortest or len(newpath) < len(shortest):
shortest = newpath
return shortest

def getEndFromBackbone(backbone, endLabels):
"""
:param backbone: :class: Entry for a backbone of molecule
:param endLabels: Labels in the end groups
:return: A subgraph representing the end group of the molecule
"""
#make copy for manipulation
copyGroup = backbone.item.copy(True)

#Find the endGroup atoms
for atom in copyGroup.atoms:
if atom.label in endLabels:
midAtom = atom
break

#find the bonds to break
bondsToBreak = []
for atom2, bond in midAtom.bonds.iteritems():
if atom2.label is None or atom2.label not in endLabels: #
bondsToBreak.append(bond)


for bond in bondsToBreak:
copyGroup.removeBond(bond)

#split group into end and backbone fragment
groups = copyGroup.split()

#verify group was split correctly and identify the correct end group
endLabels = set(endLabels)
for group in groups:
groupLabels = set(atom.label for atom in group.atoms)
groupLabels.discard('')
if endLabels == groupLabels:
break
else:
print(endLabels)
print(groupLabels)
for group in groups:
print(group.toAdjacencyList(label=backbone.label))
raise Exception("Group {0} not split correctly".format(backbone.label))

return group


family = self.database.kinetics.families[family_name]

backbone = family.forwardTemplate.reactants[0]

endGroups = [entry for entry in family.groups.top if entry not in family.forwardTemplate.reactants]

endLabels = {}
for endGroup in endGroups:
labels = []
for atom in endGroup.item.atoms:
if atom.label:
labels.append(atom.label)
endLabels[endGroup] = set(labels)

#one atom from each end group
midLabels = ["*1", "*3"]

# set of all end_labels should be backbone label
backboneLabel = set([])
for end, end_label in endLabels.iteritems():
for label in end_label:
backboneLabel.add(label)

#define types of errors
A = [] #end groups have too many labels
B = [] #end group lacks necessary label
C = [] #backbone missing end group labels
D = [] #backbone missing labels in between groups
E = [] #backbone tries to define atoms inside end groups
for group_name, entry in family.groups.entries.iteritems():
if isinstance(entry.item, Group):
group = entry.item
if backbone in family.ancestors(entry):
for atom in group.atoms:
if atom.label: presentLabels.add(atom.label)
#Check C
for endGroup, labels in endLabels.iteritems():
if not labels.issubset(presentLabels):
C.append([endGroup, entry])
#check D
midAtoms = [group.getLabeledAtom(x) for x in midLabels]
pathAtoms = find_shortest_path(midAtoms[0], midAtoms[1])
for atom in pathAtoms:
if not atom.label:
D.append([backbone, entry])
break
#check E
for endGroup, labels in endLabels.iteritems():
endFromBackbone = getEndFromBackbone(entry, labels)
presentLabels = endFromBackbone.getLabeledAtoms()
presentLabels = set(presentLabels.keys())
if labels == presentLabels:
if not endGroup.item.isIdentical(endFromBackbone):
E.append([endGroup, entry])
else: raise Exception("Group {0} has split into end group {1}, but does not match any root".format(entry.label, endFromBackbone.toAdjacencyList()))



else:
presentLabels = set([])
for endNode, labelledAtoms in endLabels.iteritems():
if endNode in family.ancestors(entry):
for atom in group.atoms:
if atom.label: presentLabels.add(atom.label)
#Check A
if not presentLabels.issubset(labelledAtoms):
A.append([endNode, entry])
#Check B
if not labelledAtoms.issubset(presentLabels):
B.append([endNode, entry])


#print outputs
if A != []:
s = "These end groups have extra labels that their top level end group do not have:"+"\n [root group, error group]"
for x in A:
s += '\n'+str(x)
nose.tools.assert_true(False,s)
if B != []:
s = "These end groups are missing labels that their top level end group have:"+"\n [root group, error group]"
for x in B:
s += '\n'+str(x)
nose.tools.assert_true(False,s)
if C != []:
s = "These backbone groups are missing labels that are in the end groups:"+"\n [root group, error group]"
for x in C:
s += '\n'+str(x)
nose.tools.assert_true(False,s)
if D != []:
s = "These backbone groups are missing labels along the path atoms:"+"\n [root group, error group]"
for x in D:
s += '\n'+str(x)
nose.tools.assert_true(False,s)
if E != []:
s = "These backbone have end subgraphs that don't match a root:"+"\n [root group, error group]"
for x in E:
s += '\n'+str(x)
nose.tools.assert_true(False,s)


def general_checkNodesFoundInTree(self, group_name, group):
"""
This test checks whether nodes are found in the tree, with proper parents.
Expand Down

0 comments on commit df78915

Please sign in to comment.