-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathadmixfrog2eigenstrat_v1.py
99 lines (76 loc) · 3.73 KB
/
admixfrog2eigenstrat_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 21 12:42:34 2019
@author: rtournebize
"""
import numpy as np
import argparse
import sys
import random
parser = argparse.ArgumentParser(description='Convert ADMIXFROG output to extended EIGENSTRAT format')
parser.add_argument('-f', '--input', type=str, required=True, help='File to convert.')
parser.add_argument('-o', '--outfilePrefix', type=str, required=True, help='Prefix of the output file.')
parser.add_argument('-s', '--snpFile', type=str, required=False, default=None, help='List of SNPs to retain (the intersection will be based on chromosome and physical positions).')
args = parser.parse_args()
input_prefix = args.input
output_prefix = args.outfilePrefix
snpFile = args.snpFile
if snpFile is not None:
targetSNP = np.genfromtxt(snpFile, dtype=str, usecols = (1,3))
targetSNP = set([targetSNP[x,0]+':'+targetSNP[x,1] for x in range(targetSNP.shape[0])])
SNP = np.genfromtxt(input_prefix, dtype=str, usecols = (0,1,2), delimiter=',', skip_header = 1)
print('Finding intersection using chromosome-position matches')
goodSNPs = [False]*SNP.shape[0]
for i in range(SNP.shape[0]):
snp = SNP[i,0]+':'+SNP[i,1]
if snp in targetSNP:
goodSNPs[i] = True
print('Exporting')
with open(output_prefix+'.snp', 'w') as fout:
for i in range(SNP.shape[0]):
if goodSNPs[i] is True:
fout.write('. '+str(SNP[i,0])+' '+SNP[i,2]+' '+SNP[i,1]+' N N\n')
with open(output_prefix+'_Morgans.snp', 'w') as fout:
for i in range(SNP.shape[0]):
if goodSNPs[i] is True:
fout.write('. '+str(SNP[i,0])+' '+str(float(SNP[i,2])*0.01)+' '+SNP[i,1]+' N N\n')
print('Initial number of SNPs: '+str(SNP.shape[0]))
print('Number of output SNPs: '+str(sum(goodSNPs)))
del SNP
GENO = np.genfromtxt(input_prefix, dtype=float, usecols = (5,6,7), delimiter=',', skip_header = 1)
with open(output_prefix+'.geno', 'w') as fout:
with open(output_prefix+'.glhood', 'w') as fout2:
for i in range(GENO.shape[0]):
if goodSNPs[i] is True:
gl = np.amax(GENO[i,:])
got = np.where(GENO[i,:] == gl)[0]
if len(got) != 1:
print('Two equally probable genotype, picking one random')
gt = int(random.choice(got))
else:
gt = int(got)
fout.write(str(gt)+'\n')
fout2.write(str(gl)+'\n')
with open(output_prefix+'.ind', 'w') as fout:
fout.write(output_prefix+' . '+output_prefix+'\n')
else:
SNP = np.genfromtxt(input_prefix, dtype=str, usecols = (0,1,2), delimiter=',', skip_header = 1)
print('Exporting')
with open(output_prefix+'.snp', 'w') as fout:
for i in range(SNP.shape[0]):
fout.write('. '+str(SNP[i,0])+' '+SNP[i,2]+' '+SNP[i,1]+' N N\n')
with open(output_prefix+'_Morgans.snp', 'w') as fout:
for i in range(SNP.shape[0]):
fout.write('. '+str(SNP[i,0])+' '+str(float(SNP[i,2])*0.01)+' '+SNP[i,1]+' N N\n')
del SNP
GENO = np.genfromtxt(input_prefix, dtype=float, usecols = (5,6,7), delimiter=',', skip_header = 1)
with open(output_prefix+'.geno', 'w') as fout:
with open(output_prefix+'.glhood', 'w') as fout2:
for i in range(GENO.shape[0]):
gl = np.amax(GENO[i,:])
gt = int(np.where(GENO[i,:] == gl)[0])
fout.write(str(gt)+'\n')
fout2.write(str(gl)+'\n')
with open(output_prefix+'.ind', 'w') as fout:
fout.write(output_prefix+' . '+output_prefix+'\n')