-
Notifications
You must be signed in to change notification settings - Fork 42
/
fasta_reverse_comp.py
executable file
·44 lines (35 loc) · 1.36 KB
/
fasta_reverse_comp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python
"""Extract all lines of a file found between a 'start' and a 'stop' markers.
Usage:
%program <input_file> <start_marker> <stop_marker> <output_file>"""
import sys
try:
from Bio import SeqIO
except:
print "This program requires the Biopython library"
sys.exit(0)
try:
in_file = open(sys.argv[1], "rU")
out_file = open(sys.argv[2], "w")
except:
print __doc__
sys.exit(0)
def complement(seq):
"""Return the complement of a sequence, *NOT* it's reverse complement"""
if not seq.isalpha():
print "The sequence contained non-alphabetic characters"
print seq
if not seq.isupper():
print "The sequence contained non capital-letter characters"
#seq = seq.upper()
seq = seq.replace("A","1").replace("T","2").replace("C","3").replace("G","4")
seq = seq.replace("a","5").replace("t","6").replace("c","7").replace("t","8")
seq = seq.replace("1","T").replace("2","A").replace("3","G").replace("4","C")
seq = seq.replace("5","t").replace("6","a").replace("7","g").replace("8","c")
return seq
def reverse_complement(seq):
return complement(seq)[::-1]
sequences = ([seq.id, seq.seq.tostring()] for seq in SeqIO.parse(in_file, 'fasta'))
with open(sys.argv[2], "w") as out_file:
for seq in sequences:
out_file.write(">" + seq[0] + "\n" + reverse_complement(seq[1]) + "\n")