From df7891526a0a0fcab5a08825436396af2d865f5b Mon Sep 17 00:00:00 2001
From: Matt Johnson <mjohnson541@gmail.com>
Date: Mon, 13 Feb 2017 15:39:01 -0500
Subject: [PATCH] addition of a database test for unimolecular groups having
 multiple trees, checks that end group labels are consistent throughout each
 end group tree, that the backbone has all labels present in the end groups
 and labels indicating the shortest path between end groups, and that each end
 group subgraph in each entry in the backbone tree is the top level of the
 corresponding end group tree

---
 testing/databaseTest.py | 185 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 182 insertions(+), 3 deletions(-)

diff --git a/testing/databaseTest.py b/testing/databaseTest.py
index 6ebf49fb9c..abf9f6d414 100644
--- a/testing/databaseTest.py
+++ b/testing/databaseTest.py
@@ -80,6 +80,13 @@ def test_kinetics(self):
             self.compat_func_name = test_name
             yield test, family_name
             
+            if len(family.forwardTemplate.reactants)==1 and len(family.groups.top) != 1 and family_name != 'Diels_alder_addition':
+                test = lambda x: self.kinetics_checkUnimolecularGroups(family_name)
+                test_name = "Kinetics family {0} check that unimolecular group is formatted correctly?".format(family_name)
+                test.description = test_name
+                self.compat_func_name = test_name
+                yield test, family_name
+            
             for depository in family.depositories:
                 
                 test = lambda x: self.kinetics_checkAdjlistsNonidentical(depository)
@@ -95,7 +102,8 @@ def test_kinetics(self):
             test.description = test_name
             self.compat_func_name = test_name
             yield test, library_name
-        
+            
+            
     def test_thermo(self):
         for group_name, group in self.database.thermo.groups.iteritems():
             test = lambda x: self.general_checkNodesFoundInTree(group_name, group)
@@ -439,8 +447,179 @@ def kinetics_checkCdAtomType(self, family_name):
 The following adjList may have atoms in a different ordering than the input file:
 {4}
                                             """.format(family_name, entry, correctAtom, index+1, entry.item.toAdjacencyList()))
-
-
+    
+    def kinetics_checkUnimolecularGroups(self,family_name):
+        """
+        This test goes through all unimolecular groups that have more than one top level, the first 
+        top level is assumed to be the backbone (contains the whole reactant molecule) and the other top levels are assumed
+        to be endgroups
+        the following are checked:
+        1)endgroup entries have the same labels as their top level entry
+        2)backbone groups have all labels that endgroups have
+        3)backbone groups have labels tracing between the endgroups that follow the shortest path
+        4)the backbone subgraph corresponding to each endgroup is the top level entry of the
+        corresponding endgroup for every endgroup
+        """
+        def find_shortest_path(start, end, path=None):
+            path = path if path else []
+            path = path + [start]
+            if start == end:
+                return path
+        
+            shortest = None
+            for node,_ in start.bonds.iteritems():
+                if node not in path:
+                    newpath = find_shortest_path(node, end, path)
+                    if newpath:
+                        if not shortest or len(newpath) < len(shortest):
+                            shortest = newpath
+            return shortest
+    
+        def getEndFromBackbone(backbone, endLabels):
+            """
+            :param backbone: :class: Entry for a backbone of molecule
+            :param endLabels: Labels in the end groups
+            :return: A subgraph representing the end group of the molecule
+            """
+            #make copy for manipulation
+            copyGroup = backbone.item.copy(True)
+        
+            #Find the endGroup atoms
+            for atom in copyGroup.atoms:
+                if atom.label in endLabels:
+                    midAtom = atom
+                    break
+        
+            #find the bonds to break
+            bondsToBreak = []
+            for atom2, bond in midAtom.bonds.iteritems():
+                if atom2.label is None or atom2.label not in endLabels: #
+                    bondsToBreak.append(bond)
+                    
+        
+            for bond in bondsToBreak:
+                copyGroup.removeBond(bond)
+        
+            #split group into end and backbone fragment
+            groups = copyGroup.split()
+        
+            #verify group was split correctly and identify the correct end group
+            endLabels = set(endLabels)
+            for group in groups:
+                groupLabels = set(atom.label for atom in group.atoms)
+                groupLabels.discard('')
+                if endLabels == groupLabels:
+                    break
+            else:
+                print(endLabels)
+                print(groupLabels)
+                for group in groups:
+                    print(group.toAdjacencyList(label=backbone.label))
+                raise Exception("Group {0} not split correctly".format(backbone.label))
+        
+            return group
+        
+        
+        family = self.database.kinetics.families[family_name]
+        
+        backbone =  family.forwardTemplate.reactants[0]
+    
+        endGroups = [entry for entry in family.groups.top if entry not in family.forwardTemplate.reactants]
+    
+        endLabels = {}
+        for endGroup in endGroups:
+            labels = []
+            for atom in endGroup.item.atoms:
+                if atom.label: 
+                    labels.append(atom.label)
+            endLabels[endGroup] = set(labels)
+    
+        #one atom from each end group
+        midLabels = ["*1", "*3"]
+    
+        # set of all end_labels should be backbone label
+        backboneLabel = set([])
+        for end, end_label in endLabels.iteritems():
+            for label in end_label:
+                backboneLabel.add(label)
+    
+        #define types of errors
+        A = [] #end groups have too many labels
+        B = [] #end group lacks necessary label
+        C = [] #backbone missing end group labels
+        D = [] #backbone missing labels in between groups
+        E = [] #backbone tries to define atoms inside end groups
+        for group_name, entry in family.groups.entries.iteritems():
+            if isinstance(entry.item, Group):
+                group = entry.item
+                if backbone in family.ancestors(entry):
+                    for atom in group.atoms:
+                        if atom.label: presentLabels.add(atom.label)
+                    #Check C
+                    for endGroup, labels in endLabels.iteritems():
+                        if not labels.issubset(presentLabels):
+                            C.append([endGroup, entry])
+                    #check D
+                    midAtoms = [group.getLabeledAtom(x) for x in midLabels]
+                    pathAtoms = find_shortest_path(midAtoms[0], midAtoms[1])
+                    for atom in pathAtoms:
+                        if not atom.label:
+                            D.append([backbone, entry])
+                            break
+                    #check E
+                    for endGroup, labels in endLabels.iteritems():
+                        endFromBackbone = getEndFromBackbone(entry, labels)
+                        presentLabels = endFromBackbone.getLabeledAtoms()
+                        presentLabels = set(presentLabels.keys())
+                        if labels == presentLabels:
+                            if not endGroup.item.isIdentical(endFromBackbone):
+                                E.append([endGroup, entry])
+                        else: raise Exception("Group {0} has split into end group {1}, but does not match any root".format(entry.label, endFromBackbone.toAdjacencyList()))
+    
+    
+    
+                else:
+                    presentLabels = set([])
+                    for endNode, labelledAtoms in endLabels.iteritems():
+                        if endNode in family.ancestors(entry):
+                            for atom in group.atoms:
+                                if atom.label: presentLabels.add(atom.label)
+                            #Check A
+                            if not presentLabels.issubset(labelledAtoms):
+                                A.append([endNode, entry])
+                            #Check B
+                            if not labelledAtoms.issubset(presentLabels):
+                                B.append([endNode, entry])
+    
+    
+        #print outputs
+        if A != []:
+            s = "These end groups have extra labels that their top level end group do not have:"+"\n [root group, error group]"
+            for x in A:
+                s += '\n'+str(x)
+            nose.tools.assert_true(False,s)
+        if B != []:
+            s = "These end groups are missing labels that their top level end group have:"+"\n [root group, error group]"
+            for x in B:
+                s += '\n'+str(x)
+            nose.tools.assert_true(False,s)
+        if C != []:
+            s = "These backbone groups are missing labels that are in the end groups:"+"\n [root group, error group]"
+            for x in C:
+                s += '\n'+str(x)
+            nose.tools.assert_true(False,s)
+        if D != []:
+            s = "These backbone groups are missing labels along the path atoms:"+"\n [root group, error group]"
+            for x in D:
+                s += '\n'+str(x)
+            nose.tools.assert_true(False,s)
+        if E != []:
+            s = "These backbone have end subgraphs that don't match a root:"+"\n [root group, error group]"
+            for x in E:
+                s += '\n'+str(x)
+            nose.tools.assert_true(False,s)
+            
+    
     def general_checkNodesFoundInTree(self, group_name, group):
         """
         This test checks whether nodes are found in the tree, with proper parents.