forked from weka511/bioinformatics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BA9C.py
78 lines (68 loc) · 2.76 KB
/
BA9C.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Copyright (C) 2019-2020 Greenweaves Software Limited
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# BA9C Construct the Suffix Tree of a String
import argparse
from helpers import read_strings
from snp import SuffixTree
def check(Edges,Expected):
print (f'Expected = {len(Expected)} edges, actual={len(Edges)}')
mismatches = 0
for a,b in zip(sorted(Edges),sorted(Expected)):
if a!=b:
mismatches+=1
print (f'Expected {b}, was {a}')
print(f'{0} mismatches')
def compare_edges(Edges,Expected):
print (f'Expected = {len(Expected)} edges, actual={len(Edges)}')
expected = iter(sorted(Expected))
edges = iter(sorted(Edges))
exp = next(expected)
ed = next(edges)
while exp != '-' and ed !='-':
if exp<ed:
print('{0},{1}'.format(exp,'-'))
exp = next(expected,'-')
elif ed<exp:
print('{0},{1}'.format('-',ed))
ed = next(edges,'-')
else:
exp = next(expected,'-')
ed = next(edges,'-')
if __name__=='__main__':
parser = argparse.ArgumentParser('BA9C Construct the Suffix Tree of a String ')
parser.add_argument('--sample', default=False, action='store_true', help='process sample dataset')
parser.add_argument('--extra', default=False, action='store_true', help='process extra dataset')
parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset')
args = parser.parse_args()
if args.sample:
tree = SuffixTree()
tree.build('ATAAATG$')
for edge in tree.collectEdges():
print (edge)
if args.extra:
Input,Expected = read_strings('data/SuffixTreeConstruction.txt',init=0)
tree = SuffixTree()
tree.build(Input[0])
#tree.print()
Edges = tree.collectEdges()
compare_edges(Edges,Expected)
if args.rosalind:
Input = read_strings(r'data/rosalind_ba9c.txt')
tree = SuffixTree()
tree.build(Input[0])
Edges = tree.collectEdges()
for e in Edges:
print (e)