-
Notifications
You must be signed in to change notification settings - Fork 26
Conducting conflict analyses
Hopefully you have peyotl installed and if not check this out. So I assume that you have a working peyotl and you understand virtualenv (if you use it).
We can use what we have from the other tutorials to put this together for a simple analysis of conflict in a portion of the synthetic tree. This requires us to
- Get the trees for a clade and put them in a file
- Get a section of the synthetic tree and put it in a file
- Conduct a conflict analysis of the trees downloaded against the synthetic tree
- Present the results in a reasonable manner
All but the last two in this list have been done in the other tutorials, but we will put it all together.
Here I am going to make use of a very simple node class in python and a newick reader. The source for these files is below. Also, I am going to put all of the pieces required here in one file instead of having multiple (as you might have done for the previous tutorials). Of course, you can do what you like. I am also going to keep it as simple as possible so it is clear how to modify, instead of making it particularly refined or final. You can run this by throwing all these files in a directory and running python map_ot.py Lonicera 649885 > tree.nex
and then opening the tree.nex
in figtree. In this case, Lonicera
is my named clade of interest (to get a set of trees from the tree database) and 649885
is the specific ott id for the part of the synth tree I want. You could modify the script to only use the ott id of course. But here, I simple have it to show different ways of using the apis. To see how to use ottId to get studies of interest, see this tutorial page.
Here is the map_ot.py
from peyotl.api import APIWrapper
from peyotl.nexson_syntax import create_content_spec, \
get_ot_study_info_from_nexml, \
PhyloSchema
import sys
import codecs
import node
import tree_reader
import tree_utils
#get the synth tree based on ott_id
def get_synth_tree_bit(ott_id):
tm = APIWrapper().treemachine
x = tm.get_synthetic_tree(format='newick',ott_id=ott_id,max_depth=2)
return x['newick']
#reading the trees from the list of tree files
def read_trees(tree_names):
trees = []
for i in tree_names:
tf = open(str(i)+".tre", "r")
tl = tf.readline()
trees.append(tree_reader.read_tree_string(tl))
tf.close()
return trees
#will convert nexson to newick; much of this taken from scripts/nexson/nexson_newick.py
def convert_nexson_newick(inp, tid):
outfn = tid+".tre"
src_schema = None
out = codecs.open(outfn, mode='w', encoding='utf-8')
otu_label = 'ottid' # originallabel, ottid, otttaxonname
blob = inp
schema = create_content_spec(content='tree', content_id=tid, format='newick', otu_label=otu_label)
try:
schema.convert(src=blob, serialize=True, output_dest=out, src_schema=src_schema)
except KeyError:
if 'nexml' not in blob and 'nex:nexml' not in blob:
blob = blob['data']
schema.convert(src=blob, serialize=True, output_dest=out, src_schema=src_schema)
else:
raise
return
#just break down the tree into each node's the ingroups and outgroups
def get_biparts(tree):
ins = []
outs = []
rootnms = set(tree.lvsnms())
for i in tree.iternodes():
if len(i.children) == 0 or i == tree:
continue
tins = set(i.lvsnms())
ins.append(tins)
outs.append(rootnms.difference(tins))
return [ins,outs]
#map very simple conflict and the orcas for nodes
#conflict here is just calculated if there is an intersection between
#ingroup1 and outgroup2, ingroup2 and outgroup1, ingroup1 and ingroup2, and outgroup1 and outgroup2
def map_tree_into_stree(tree, treename, stree):
ins,outs = get_biparts(tree)
souts = set(stree.lvsnms())
for i in stree.iternodes():
if len(i.children) == 0 or i == stree:
continue
sin = i.data["otts"]
tsouts = i.data["roototts"]
for j,k in zip(ins,outs):
#conflict
if len(j.intersection(sin)) > 0 and len(j.intersection(tsouts)) > 0 \
and len(k.intersection(sin)) > 0 and len(k.intersection(tsouts)) > 0:
i.data["conflicts"].add(treename)
#mrca
else:
testnames = sin.intersection(j)
if len(testnames) < 2:
continue
testnodes = []
for m in stree.leaves():
if m.label.split("ott")[1] in testnames:
testnodes.append(m)
tree_utils.get_mrca(testnodes,stree).data["mrcas"].add(treename)
#present the results so they can be viewed in figtree
def get_figtree(tree):
for i in tree.iternodes():
if "conflicts" in i.data or "mrcas" in i.data:
i.label += "[&"
labdat = []
if "conflicts" in i.data:
labdat.append("conflict="+str(len(set(i.data["conflicts"]))))
if "mrcas" in i.data:
labdat.append("mrcas="+str(len(set(i.data["mrcas"]))))
i.label += ",".join(labdat)+"]"
retst = "#NEXUS\nbegin trees;\n\ttree tree1 = [&R] "
retst += tree.get_newick_repr(False)+";\nend;\n"
return retst
if __name__ == "__main__":
if len(sys.argv) != 3:
print "python "+sys.argv[0]+" taxonname ottid"
sys.exit(0)
a = APIWrapper()
oti = a.oti
ps = a.phylesystem_api
b = oti.find_trees(ottTaxonName=sys.argv[1])
tree_names = []
for i in b:
for j in i["matched_trees"]:
tr = j["nexson_id"]
st = "_".join(j["oti_tree_id"].split("_")[0:2])
nexson = ps.get(st)
convert_nexson_newick(nexson, tr)
tree_names.append(tr)
trees = read_trees(tree_names)
stree = get_synth_tree_bit(sys.argv[2])
sytree = tree_reader.read_tree_string(str(stree))
#preparing things
roototts = set([i.split("ott")[1] for i in sytree.lvsnms()])
for i in sytree.iternodes():
if i != sytree and len(i.children) > 0:
i.data["conflicts"] = set()
i.data["mrcas"] = set()
i.data["otts"] = set([j.split("ott")[1] for j in i.lvsnms()])
i.data["roototts"] = roototts.difference(i.data["otts"])
#do some basic conflict
for i in range(len(trees)):
map_tree_into_stree(trees[i],tree_names[i],sytree)
print get_figtree(sytree)
The other files needed for the above script include this simple node class in node.py
class Node:
def __init__(self):
self.label = ""
self.length = 0.0
self.parent = None
self.children = []
self.data = {}
self.istip = False
self.comment = ""
def add_child(self, child):
# make sure that the child is not already in there
assert child not in self.children
self.children.append(child)
child.parent = self
def remove_child(self, child):
# make sure that the child is in there
assert child in self.children
self.children.remove(child)
child.parent = None
def leaves(self, v=None):
if v is None:
v = []
if len(self.children) == 0:
v.append(self)
else:
for child in self.children:
child.leaves(v)
return v
def leaves_fancy(self):
return [n for n in self.iternodes() if n.istip]
def lvsnms(self):
return [n.label for n in self.iternodes() if n.istip]
def iternodes(self, order="preorder"):
if order.lower() == "preorder":
yield self
for child in self.children:
for d in child.iternodes(order):
yield d
if order.lower() == "postorder":
yield self
def get_newick_repr(self, showbl=False):
ret = ""
for i in range(len(self.children)):
if i == 0:
ret += "("
ret += self.children[i].get_newick_repr(showbl)
if i == len(self.children)-1:
ret += ")"
else:
ret += ","
if self.label is not None:
ret += self.label
if showbl is True:
ret += ":"+"{:.12g}".format(self.length)
return ret
This simple newick reader in tree_reader.py
from node import Node
def read_tree_string(instr):
root = None
index = 0
nextchar = instr[index]
start = True
keepgoing = True
curnode = None
while keepgoing is True:
if nextchar is "(":
if start is True:
root = Node()
curnode = root
start = False
else:
newnode = Node()
curnode.add_child(newnode)
curnode = newnode
elif nextchar is ',':
curnode = curnode.parent
elif nextchar is ")":
curnode = curnode.parent
index += 1
nextchar = instr[index]
name = ""
while True:
if nextchar is ',' or nextchar is ')' or nextchar is ':' \
or nextchar is ';' or nextchar is '[':
break
name += nextchar
index += 1
nextchar = instr[index]
curnode.label = name
index -= 1
elif nextchar is "[":
index += 1
nextchar = instr[index]
name = ""
while True:
if nextchar is "]":
break
name += nextchar
index += 1
nextchar = instr[index]
curnode.comment += name
elif nextchar == ';':
keepgoing = False
break
elif nextchar == ":":
index += 1
nextchar = instr[index]
brlen = ""
while True:
if nextchar is ',' or nextchar == ')' or nextchar is ':' \
or nextchar is ';' or nextchar is '[':
break
brlen += nextchar
index += 1
nextchar = instr[index]
curnode.length = float(brlen)
index -= 1
elif nextchar == ' ':
index += 1
nextchar = instr[index]
else: # this is an external named node
newnode = Node()
curnode.add_child(newnode)
curnode = newnode
curnode.istip = True
name = ""
while True:
if nextchar is ',' or nextchar == ')' or nextchar is ':' \
or nextchar is ';' or nextchar is '[':
break
name += nextchar
index += 1
nextchar = instr[index]
curnode.label = name
index -= 1
if index < len(instr) - 1:
index += 1
nextchar = instr[index]
return root
And this simple tree_utils.py
file
def get_mrca(nodes, tree):
traceback = []
first = nodes[0]
while first != tree:
first = first.parent
traceback.append(first)
if first.parent is None:
break
curmrca = nodes[0].parent
for i in nodes:
if i == nodes[0]:
continue
curmrca = mrca_recurs(curmrca, traceback, i)
return curmrca
def mrca_recurs(node1, path1, node2):
path = path1[path1.index(node1):]
parent = node2
mrca = None
while parent is not None:
if parent in path:
mrca = parent
break
parent = parent.parent
return mrca