-
Notifications
You must be signed in to change notification settings - Fork 14
/
graph_virus.py
110 lines (83 loc) · 3.96 KB
/
graph_virus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from Bio import Phylo
import pandas as pd
class Virus:
species = ['avian', 'dengue', 'ebola', 'flu', 'lassa', 'measles', 'mumps', 'zika']
def get_x_coordinates(tree):
"""Associates to each clade an x-coord.
returns dict {clade: x-coord}
"""
xcoords = tree.depths()
# tree.depth() maps tree clades to depths (by branch length).
# returns a dict {clade: depth} where clade runs over all Clade instances of the tree, and depth is the distance from root
# to clade
# If there are no branch lengths, assign unit branch lengths
if not max(xcoords.values()):
xcoords = tree.depths(unit_branch_lengths=True)
return xcoords
def get_y_coordinates(tree, dist=1.3):
"""
returns dict {clade: y-coord}
The y-coordinates are (float) multiple of integers (i*dist below)
dist depends on the number of tree leafs
"""
maxheight = tree.count_terminals() # Counts the number of tree leafs.
# Rows are defined by the tips/leafs
ycoords = dict((leaf, maxheight - i * dist) for i, leaf in enumerate(reversed(tree.get_terminals())))
def calc_row(clade):
for subclade in clade:
if subclade not in ycoords:
calc_row(subclade)
ycoords[clade] = (ycoords[clade.clades[0]] +
ycoords[clade.clades[-1]]) / 2
if tree.root.clades:
calc_row(tree.root)
return ycoords
def get_clade_lines(orientation='horizontal', y_curr=0, x_start=0, x_curr=0, y_bot=0, y_top=0,
line_color='rgb(25,25,25)', line_width=0.5):
"""define a shape of type 'line', for branch
"""
branch_line = dict(type='line',
layer='below',
line=dict(color=line_color,
width=line_width)
)
if orientation == 'horizontal':
branch_line.update(x0=x_start,
y0=y_curr,
x1=x_curr,
y1=y_curr)
elif orientation == 'vertical':
branch_line.update(x0=x_curr,
y0=y_bot,
x1=x_curr,
y1=y_top)
else:
raise ValueError("Line type can be 'horizontal' or 'vertical'")
return branch_line
def draw_clade(clade, x_start, line_shapes, line_color='rgb(15,15,15)', line_width=1, x_coords=0, y_coords=0):
"""Recursively draw the tree branches, down from the given clade"""
x_curr = x_coords[clade]
y_curr = y_coords[clade]
# Draw a horizontal line from start to here
branch_line = Virus.get_clade_lines(orientation='horizontal', y_curr=y_curr, x_start=x_start, x_curr=x_curr,
line_color=line_color, line_width=line_width)
line_shapes.append(branch_line)
if clade.clades:
# Draw a vertical line connecting all children
y_top = y_coords[clade.clades[0]]
y_bot = y_coords[clade.clades[-1]]
line_shapes.append(Virus.get_clade_lines(orientation='vertical', x_curr=x_curr, y_bot=y_bot, y_top=y_top,
line_color=line_color, line_width=line_width))
# Draw descendants
for child in clade:
Virus.draw_clade(child, x_curr, line_shapes, x_coords=x_coords, y_coords=y_coords)
def read_treefile(filename):
tree = Phylo.read(filename, "newick")
return tree
def read_metadata(filename):
df = pd.read_csv(filename)
return df
def create_title(virus, nb_genome):
graph_title = "Phylogeny of " + virus + " Virus<br>" + str(
nb_genome) + " genomes colored according to region and country"
return graph_title