diff --git a/rmgpy/data/base.py b/rmgpy/data/base.py index 532e785e03..2d36a0615c 100644 --- a/rmgpy/data/base.py +++ b/rmgpy/data/base.py @@ -786,7 +786,7 @@ def descendants(self, node): descendants.extend(self.descendants(child)) return descendants - def isWellFormed(self): + def checkWellFormed(self): """ Return :data:`True` if the database is well-formed. A well-formed database has an entry in the dictionary for every entry in the tree, and @@ -797,47 +797,123 @@ def isWellFormed(self): nodes in the tree, if the tree is present; this is for databases with multiple trees, e.g. the kinetics databases. """ - - wellFormed = True + + + from rmgpy.data.kinetics.family import KineticsFamily + + #list of nodes that are not wellFormed + noGroup=[] + noMatchingGroup={} + notInTree=[] + notSubgroup=[] + probablyProduct=[] + + # Give correct arguments for each type of database + if isinstance(self, KineticsFamily): + library=self.rules.entries + groups=self.groups.entries + treeIsPresent=True + topNodes=self.getRootTemplate() # Make list of all nodes in library - libraryNodes = [] - for nodes in self.library: - libraryNodes.extend(nodes.split(';')) - libraryNodes = list(set(libraryNodes)) + libraryNodes=[] + libraryNodesSplit = [] + for nodes in library: + libraryNodes.append(nodes) + libraryNodesSplit.extend(nodes.split(';')) + libraryNodesSplit = list(set(libraryNodesSplit)) + - for node in libraryNodes: + try: + for node in libraryNodesSplit: # All nodes in library must be in dictionary - try: - if node not in self.entries: - raise DatabaseError('Node "{0}" in library is not present in dictionary.'.format(node)) - except DatabaseError, e: - wellFormed = False - logging.error(str(e)) - - # If a tree is present, all nodes in library should be in tree - # (Technically the database is still well-formed, but let's warn - # the user anyway - if len(self.tree.parent) > 0: - try: - if node not in self.tree.parent: - raise DatabaseError('Node "{0}" in library is not present in tree.'.format(node)) - except DatabaseError, e: - logging.warning(str(e)) - - # If a tree is present, all nodes in tree must be in dictionary - if self.tree is not None: - for node in self.tree.parent: - try: - if node not in self.entries: - raise DatabaseError('Node "{0}" in tree is not present in dictionary.'.format(node)) - except DatabaseError, e: - wellFormed = False - logging.error(str(e)) - - return wellFormed + if node not in groups: + noGroup.append(node) + + #no point checking in tree if it doesn't even exist in groups + for libraryNode in libraryNodes: + nodes=libraryNode.split(';') + for libraryEntry in library[libraryNode]: + for node in nodes: + for libraryGroup in libraryEntry.item.reactants: + try: + if groups[node].item.isIsomorphic(libraryGroup): + break + except AttributeError: + if isinstance(groups[node].item, LogicOr) and isinstance(libraryGroup, LogicOr): + if groups[node].item==libraryGroup: + break + except TypeError: + print libraryGroup, type(libraryGroup) + except KeyError: + noGroup.append(node) + else: + noMatchingGroup[node]=libraryNode + + if treeIsPresent: + # All nodes need to be in the tree + # This is true when ascending through parents leads to a top node + for nodeName in groups: + ascendParent=self.groups.entries[nodeName] + + while ascendParent not in topNodes: + child=ascendParent + ascendParent=ascendParent.parent + if ascendParent is None or child not in ascendParent.children: + if child.index==-1: + probablyProduct.append(child.label) + break + else: + # If a group is not in a tree, we want to save the uppermost parent, not necessarily the original node + notInTree.append(child.label) + break + #check if child is actually subgroup of parent + ascendParent=self.groups.entries[nodeName].parent + if ascendParent is not None: + try: + if not ascendParent.item.isSubgraphIsomorphic(self.groups.entries[nodeName].item): + notSubgroup.append(nodeName) + except AttributeError: + if isinstance(groups[node].item, LogicOr) and isinstance(libraryGroup, LogicOr): + if groups[node].item==libraryGroup: + break + except TypeError: + print libraryGroup, type(libraryGroup) + # The adj list of each node actually needs to be subset of its parent's adjlist + #More to come later -nyee + except DatabaseError, e: + logging.error(str(e)) + +# # If a tree is present, all nodes in library should be in tree +# # (Technically the database is still well-formed, but let's warn +# # the user anyway +# if len(self.tree.parent) > 0: +# try: +# if node not in self.tree.parent: +# raise DatabaseError('Node "{0}" in library is not present in tree.'.format(node)) +# except DatabaseError, e: +# logging.warning(str(e)) +# +# # If a tree is present, all nodes in tree must be in dictionary +# if self.tree is not None: +# for node in self.tree.parent: +# try: +# if node not in self.entries: +# raise DatabaseError('Node "{0}" in tree is not present in dictionary.'.format(node)) +# except DatabaseError, e: +# wellFormed = False +# logging.error(str(e)) + +# for libraryRule in library: + #check the groups + + #eliminate duplicates + noGroup=list(set(noGroup)) + notInTree=list(set(notInTree)) + + return (noGroup, noMatchingGroup, notInTree, notSubgroup, probablyProduct) def matchNodeToStructure(self, node, structure, atoms): """ @@ -988,6 +1064,18 @@ def matchToStructure(self,database,structure,atoms): return True != self.invert return False != self.invert + def matchToLogicOr(self, other): + """ + Is other the same LogicOr group as self? + """ + if len(self.components)!=len(other.components): + return False + else: + for node in self.components: + if node not in other.components: + return False + return True + def getPossibleStructures(self, entries): """ Return a list of the possible structures below this node. diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py index 530c20b909..7162ac08c4 100644 --- a/rmgpy/data/kinetics/family.py +++ b/rmgpy/data/kinetics/family.py @@ -38,7 +38,7 @@ from copy import copy, deepcopy from rmgpy.data.base import Database, Entry, LogicNode, LogicOr, ForbiddenStructures,\ - ForbiddenStructureException, getAllCombinations + ForbiddenStructureException, getAllCombinations, DatabaseError from rmgpy.reaction import Reaction from rmgpy.kinetics import Arrhenius, ArrheniusEP, ThirdBody, Lindemann, Troe, \ PDepArrhenius, MultiArrhenius, MultiPDepArrhenius, \ @@ -1941,3 +1941,148 @@ def getRateCoefficientUnits(self): return 's^-1' else: raise ValueError('Unable to determine units of rate coefficient for reaction family "{0}".'.format(self.label)) + + def checkWellFormed(self): + """ + Returns a tuple of malformed database entries: + + noGroup is a list of nodes in the rules that has no corresponding group in + groups.py + + noMatchingGroup is a dictionary with entry labels from the rules as a key + and entry labels from groups as values. These are groups where rule.py's + adj list does not match group.py's. + + notInTree is a list of groups that do not appear in the tree + + notSubgroup is a dictionary with group labels as keys and atom indexes + as values. Each key is a group where the child's adj list is not a + true child of it's parent. The list of indexes corresponds to the + child's adj list index, where the atom is not a true child. + + probablyProduct is a list of groups which do not apepar in the + tree, but are probably products (as opposed to reactants) which + are created in the database loading. These are not necessarily + malformations, but because I'm not certain where they came from, + I decided to list them. + """ + + + #A function to add to the not in Subgroup dictionary + def appendToDict(dictionary, key, value): + if key not in dictionary: + dictionary[key]=[value] + else: + dictionary[key].append(value) + return dictionary + + #list of nodes that are not wellFormed + noGroup=[] + noMatchingGroup={} + tempNoMatchingGroup={} + notInTree=[] + notUnique={} + notSubgroup={} + probablyProduct=[] + + # Give correct arguments for each type of database +# if isinstance(self, KineticsFamily): + library=self.rules.entries + groups=self.groups.entries + groupsCopy=copy(groups) + topNodes=self.getRootTemplate() + + # Make list of all node names in library + libraryNodes=[] + for nodes in library: + libraryNodes.append(nodes) + + try: + #Each label in rules.py should be be in the form group1;group2;group3 etc + #and each group must appear in groups.py + for libraryNode in libraryNodes: + nodes=libraryNode.split(';') + for libraryEntry in library[libraryNode]: + for nodeName in nodes: + if nodeName not in groups: + noGroup.append(nodeName) + #If the node is not in the dictionary, we can't do the rest of the check + continue + #Each adj list in rules.py should match the adj list in group's.py + for libraryGroup in libraryEntry.item.reactants: + #break if we find a match between two groups + if isinstance(groups[nodeName].item, Group) and isinstance(libraryGroup, Group): + if groups[nodeName].item.isIsomorphic(libraryGroup): + break + #break if we find a match between two logic nodes + elif isinstance(groups[nodeName].item, LogicOr) and isinstance(libraryGroup, LogicOr): + if groups[nodeName].item.matchToLogicOr(libraryGroup): + break + #Otherwise no match is found, so we add it to the tempNoMatchingGroup + else: + tempNoMatchingGroup=appendToDict(tempNoMatchingGroup, libraryNode, nodeName) + #eliminate duplicates + for key, nodeList in tempNoMatchingGroup.iteritems(): + noMatchingGroup[key]=list(set(nodeList)) + + # Each group in groups.py should appear in the tree + # This is true when ascending through parents leads to a top node + for nodeName in groups: + nodeGroup=self.groups.entries[nodeName] + nodeGroupItem=nodeGroup.item + ascendParent=nodeGroup + while ascendParent not in topNodes: + child=ascendParent + ascendParent=ascendParent.parent + if ascendParent is None or child not in ascendParent.children: + if child.index==-1: + probablyProduct.append(child.label) + break + else: + # If a group is not in a tree, we want to save the uppermost parent, not necessarily the original node + notInTree.append(child.label) + break + + #each node should also be unique: + del groupsCopy[nodeName] + for nodeName2 in groupsCopy: + nodeGroup2Item=self.groups.entries[nodeName2].item + if isinstance(nodeGroup2Item, Group) and isinstance(nodeGroupItem, Group): + if nodeGroupItem.isIdentical(nodeGroup2Item): + notUnique=appendToDict(notUnique, nodeName, nodeName2) + if isinstance(nodeGroup2Item, LogicOr) and isinstance(nodeGroupItem, LogicOr): + if nodeGroupItem.matchToLogicOr(nodeGroup2Item): + notUnique=appendToDict(notUnique, nodeName, nodeName2) + + #For a correct child-parent relationship, each atom in the parent should have a corresponding child atom in the child. + nodeParent=nodeGroup.parent + #Atoms may be in a different order initially. Need to sort both child and parent first + #Don't need to do check for topNodes + if nodeParent is not None: + if isinstance(nodeParent.item, LogicOr): + if not nodeGroup.label in nodeParent.item.components: + #-1 index means the child is not in the LogicOr + notSubgroup[nodeName]=nodeParent.label + continue + else: + #if the parent is a LogicOr, we want to keep ascending until we get to a group or hit a discontinuity (could be + #malformed tree or just ascending past the top node) + while isinstance(nodeParent.item, LogicOr): + nodeParent=nodeParent.parent + if nodeParent == None: break + if nodeParent == None: continue +# nodeParent.item.sortAtoms() + elif isinstance(nodeGroup.item, LogicOr): + print nodeGroup, ' is an intermediate LogicOr. See if it can be replaced with a adj list.' + continue + #If both the parent and child are graphs, we can use the function isSubgroupIsomorphic if it is actually a child + if not nodeGroup.item.isSubgraphIsomorphic(nodeParent.item): + notSubgroup[nodeName]=nodeParent.label + except DatabaseError, e: + logging.error(str(e)) + + #eliminate duplicates + noGroup=list(set(noGroup)) + notInTree=list(set(notInTree)) + + return (noGroup, noMatchingGroup, notInTree, notUnique, notSubgroup, probablyProduct) diff --git a/rmgpy/molecule/group.pxd b/rmgpy/molecule/group.pxd index 9939b85e07..efdc8bdde5 100644 --- a/rmgpy/molecule/group.pxd +++ b/rmgpy/molecule/group.pxd @@ -129,3 +129,5 @@ cdef class Group(Graph): cpdef bint isSubgraphIsomorphic(self, Graph other, dict initialMap=?) except -2 cpdef list findSubgraphIsomorphisms(self, Graph other, dict initialMap=?) + + cpdef bint isIdentical(self, Graph other) diff --git a/rmgpy/molecule/group.py b/rmgpy/molecule/group.py index b6f83e501c..33f6015a85 100644 --- a/rmgpy/molecule/group.py +++ b/rmgpy/molecule/group.py @@ -320,6 +320,14 @@ def equivalent(self, other): if radical1 == radical2 and spin1 == spin2: break else: return False + #Each charge in self must have an equivalent in other + for charge1 in self.charge: + for charge2 in self.charge: + if charge1 == charge2: break + else: + return False + #The label must be the same + if not self.label==other.label: return False # Otherwise the two atom groups are equivalent return True @@ -349,9 +357,17 @@ def isSpecificCaseOf(self, other): if radical1 == radical2 and spin1 == spin2: break else: return False + #Each charge in self must have an equivalent in other + for charge1 in self.charge: + for charge2 in self.charge: + if charge1 == charge2: break + else: + return False + + #The label must be the same + if not self.label==other.label: return False # Otherwise self is in fact a specific case of other return True - ################################################################################ class GroupBond(Edge): @@ -769,3 +785,26 @@ def findSubgraphIsomorphisms(self, other, initialMap=None): raise TypeError('Got a {0} object for parameter "other", when a Group object is required.'.format(other.__class__)) # Do the isomorphism comparison return Graph.findSubgraphIsomorphisms(self, other, initialMap) + + def isIdentical(self, other): + """ + Returns ``True`` if `other` is identical and ``False`` otherwise. + The function `isIsomorphic` respects wildcards, while this function + does not, make it more useful for checking groups to groups (as + opposed to molecules to groups) + + """ + # It only makes sense to compare a Group to a Group for full + # isomorphism, so raise an exception if this is not what was requested + if not isinstance(other, Group): + raise TypeError('Got a {0} object for parameter "other", when a Group object is required.'.format(other.__class__)) + # An identical group is always a child of itself and + # is the only case where that is true. Therefore + # if we do both directions of isSubgraphIsmorphic, we need + # to get True twice for it to be identical + if not self.isSubgraphIsomorphic(other): + return False + elif not other.isSubgraphIsomorphic(self): + return False + else: + return True