-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSeqUtils.pm
109 lines (93 loc) · 2.53 KB
/
SeqUtils.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
package SeqUtils;
use strict;
use warnings;
use Exporter;
use Bio::SeqUtils;
use Bio::Seq;
use List::Util qw(sum);
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
$VERSION = 1.00;
@ISA = qw(Exporter);
@EXPORT = qw(has_nsf
entropy
);
sub has_nsf {
my ($seq) = @_;
unless (ref $seq eq 'Bio::Seq') {
# convert raw sequence into a Bio::Seq
$seq = Bio::Seq->new(-seq => $seq, -id => "foobar");
}
my $toReturn = 0;
if ($seq->alphabet eq 'dna') {
my @orfs_6frames = Bio::SeqUtils->translate_6frames($seq);
foreach my $orfseq (@orfs_6frames) {
# search for at least one frame that does not have a stop codon '*'
if ($orfseq->seq !~ /\*/) { # if you don't find an '*', the sequence contains an ORF throughout entire frame
$toReturn = 1;
}
}
}
return $toReturn;
}
sub entropy {
my ($seqn) = @_;
return unless ($seqn);
my $WINDOWSIZE = 64;
my $WINDOWSTEP = 32;
my @WINDOWSIZEARRAY = (0..61);
my $LOG62 = log(62);
my $ONEOVERLOG62 = 1/log(62);
my ($rest,$steps,@vals,$str,$num,$bynum);
my $length = length($seqn);
if($length <= $WINDOWSIZE) {
$rest = $length;
$steps = 0;
} else {
$steps = int(($length - $WINDOWSIZE) / $WINDOWSTEP) + 1;
$rest = $length - $steps * $WINDOWSTEP;
unless($rest > $WINDOWSTEP) {
$rest += $WINDOWSTEP;
$steps--;
}
}
$num = $WINDOWSIZE-2;
$bynum = 1/$num;
$num--;
my $mean = 0;
my $entropyval;
foreach my $i (0..$steps-1) {
$str = substr($seqn,($i * $WINDOWSTEP),$WINDOWSIZE);
my %counts = ();
foreach my $i (@WINDOWSIZEARRAY) {
$counts{substr($str,$i,3)}++;
}
$entropyval = 0;
foreach(values %counts) {
$entropyval -= ($_ * $bynum) * log($_ * $bynum);
}
push(@vals,($entropyval * $ONEOVERLOG62));
}
#last step
if($rest > 5) {
$str = substr($seqn,($steps * $WINDOWSTEP),$rest);
my %counts = ();
$num = $rest-2;
foreach my $i (0..($num - 1)) {
$counts{substr($str,$i,3)}++;
}
$entropyval = 0;
$bynum = 1/$num;
foreach(values %counts) {
$entropyval -= ($_ * $bynum) * log($_ * $bynum);
}
push(@vals,($entropyval / log($num)));
} else {
push(@vals,0);
}
$mean = &getArrayMean(@vals);
return $mean * 100;
}
sub getArrayMean {
return @_ ? sum(@_) / @_ : 0;
}
1;