Skip to content

Commit

Permalink
implements issue #1
Browse files Browse the repository at this point in the history
  • Loading branch information
rhr committed Oct 4, 2013
1 parent dd5d267 commit aefe3d6
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
11 changes: 11 additions & 0 deletions examples/fetch_sequences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import logging
logging.basicConfig(level=logging.INFO)

from ivy import genbank
genbank.email = '[email protected]'

# Entrez search terms are combined with OR (a OR b OR c OR ...)
terms = ['"Pedicularis rex"[organism]', 'Phtheirospermum[organism]']
seqs = genbank.fetch_DNA_seqs(terms)
with open('myseqs.fasta', 'w') as f:
genbank.SeqIO.write(seqs, f, 'fasta')
37 changes: 36 additions & 1 deletion ivy/genbank.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import re, sys
import re, sys, logging
from collections import defaultdict
from itertools import izip_longest, ifilter
from Bio import Entrez, SeqIO
Expand Down Expand Up @@ -236,3 +236,38 @@ def trimpos(rec):
first = __FIRST.search(s).start()
last = __LAST.search(s).start()-1
return (first, last)

def fetch_DNA_seqs(terms, maxn=10000, batchsize=1000):
"""
terms: sequence of search terms, quoted appropriately, with Entrez
specifiers, e.g. ['"Mus musculus"[organism]']
maxn: maximum number of sequences to return
returns list of SeqRecord objects
"""
global email
assert email, "set email!"
Entrez.email = email
h = Entrez.esearch(db="nucleotide", term=" OR ".join(terms), usehistory="y")
d = Entrez.read(h)
env = d['WebEnv']; key = d['QueryKey']
N = int(d['Count'])
if maxn: N = min(N, maxn)
logging.info('fetching %s sequences', N)
retstart = 0
seqs = []
n = 0
while n < N:
h = Entrez.efetch(
db="nucleotide", rettype='gb', webenv=env, query_key=key,
retstart=retstart, retmax=batchsize
)
v = list(SeqIO.parse(h, "genbank"))
n += len(v)
logging.info('...fetched %s', n)
seqs.extend(v)
retstart += batchsize
logging.info('...done')
return seqs



0 comments on commit aefe3d6

Please sign in to comment.