-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsubseq_index.py
25 lines (22 loc) · 1002 Bytes
/
subseq_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import bisect
class SubseqIndex(object):
""" Holds a subsequence index for a text T """
def __init__(self, t, k, ival):
self.k = k # num characters per subsequence extracted
self.ival = ival # space between them; 1=adjacent, 2=every other, etc
self.index = []
self.span = 1 + ival * (k - 1)
for i in range(len(t) - self.span + 1): # for each subseq
self.index.append((t[i:i+self.span:ival], i)) # add (subseq, offset)
self.index.sort() # alphabetize by subseq
def query(self, p):
""" Return index hits for first subseq of p """
subseq = p[:self.span:self.ival] # query with first subseq
i = bisect.bisect_left(self.index, (subseq, -1)) # binary search
hits = []
while i < len(self.index): # collect matching index entries
if self.index[i][0] != subseq:
break
hits.append(self.index[i][1])
i += 1
return hits