-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFST_by_MAF_Uni.pl
executable file
·85 lines (75 loc) · 2.04 KB
/
FST_by_MAF_Uni.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/perl -w
#
# FST_by_MAF_Uni.pl
#
# A more universal script to calculate Fst based on minor allele frequencies
# Should work on any MAF file regardless of the number of populations
#
# October 3, 2013
use strict;
# open the input maf file and the output fst file
my ($USAGE) = "\n$0 <infile.maf> <outfile.fst> <npops>
\tinfile.maf = The input file with minor allele frequencies; created by snps2maf
\toutfile.fst = The output file with Fst calculated for every pair of populations
\tnpops = The number of populations in the file\n\n";
unless (@ARGV) {
print $USAGE;
exit;
}
my ($input, $output, $npops) = @ARGV;
open (IN, $input) || die "\nUnable to open the file $input!\n";
open (OUT, ">$output") || die "\nUnable to open the file $output!\n";
# Print a header line to the output file
print OUT "Locus";
for (my $n = 1; $n < $npops; $n++) {
for (my $p = $n+1; $p <= $npops; $p++) {
print OUT "\t", "Fst", $n, "_", $p;
}
}
print OUT "\n";
# Process the input file one line at a time
while (<IN>) {
chomp $_;
if ($_ =~ /^CHROM/) {
next;
}
my @info = split(/\t/, $_);
my @exp = ();
my @num = ();
for (my $i = 0; $i < $npops; $i++) {
my $maf = $info[$i+3];
my $num1 = $info[$i+3+$npops];
push (@num, $num1);
my $exp1 = 1 - (((1-$maf)**2) + ($maf**2));
push (@exp, $exp1);
}
my @Sub = ();
my @qbar = ();
my @Tot = ();
for (my $i = 0; $i < $npops-1; $i++) {
for (my $j = $i+1; $j < $npops; $j++) {
my $S = 0;
my $q = 0;
unless (($num[$i] + $num[$j]) == 0) {
$S = (($exp[$i] * $num[$i]) + ($exp[$j] * $num[$j]))/($num[$i] + $num[$j]);
$q = (($info[$i+3] * $num[$i]) + ($info[$j+3] * $num[$j]))/($num[$i] + $num[$j]);
}
my $t = 1 - (((1 - $q)**2) + ($q**2));
push (@Sub, $S);
push (@qbar, $q);
push (@Tot, $t);
}
}
print OUT $info[0];
for (my $y = 0; $y < scalar @Sub; $y++) {
my $fst = 0;
unless ($Tot[$y] == 0) {
$fst = ($Tot[$y] - $Sub[$y])/$Tot[$y];
}
print OUT "\t", $fst;
}
print OUT "\n";
}
close(IN);
close(OUT);
exit;