-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastaParser.py
49 lines (39 loc) · 1.56 KB
/
fastaParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
from Bio import SeqIO
import lmdb
import sys
import pprint
def readData(fileName):
#Env max_dbs = 40
"""
env = lmdb.open('database', max_dbs=40)
env.set_mapsize(5000000000)
db1 = env.open_db('child_db')
with env.begin(write=True) as transaction:
with open(fileName, "rU") as handle:
for record in SeqIO.parse(handle, "fasta"):
kmerCount = str(record.id)
kmerSeq = str(record.seq)
transaction.put(kmerSeq.encode("ascii"),kmerCount.encode("ascii"))
"""
env = lmdb.Environment('database', max_dbs=35)
env.set_mapsize(5000000000)
masterenv = lmdb.Environment('masterdb', max_dbs=5, map_size=5000000000)
genomeID = fileName;
db1 = env.open_db(genomeID.encode("ascii"),dupsort=True)
placeHolderStr = '0'.encode('ascii')
with env.begin(write=True, db=db1) as transaction, masterenv.begin(write=True) as txn:
with open(fileName, "rU") as handle:
for record in SeqIO.parse(handle, "fasta"):
kmerCount = str(record.id)
kmerSeq = str(record.seq)
byteStr = kmerSeq.encode("ascii")
transaction.put(byteStr,kmerCount.encode("ascii"))
txn.put(byteStr, placeHolderStr, overwrite=True)
#print(env.stat()["entries"])
#print(masterenv.stat())
#db = lmdb.open("genomeData")
#with db.begin(write=True) as transaction:
# transaction.put("newkey".encode("ascii"), "newvalue".encode("ascii"))
if __name__ == "__main__":
readData(sys.argv[1])