-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubmitAnnotationSDBaseLine.py
72 lines (59 loc) · 2.74 KB
/
submitAnnotationSDBaseLine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import glob
import gzip
import subprocess
import numpy as npy
import pandas as pd
from subprocess import Popen, PIPE
def maxSubmitReached(max):
p1 = Popen(["qstat", "-u", "fhormoz"], stdout=PIPE)
p2 = Popen(["wc", "-l"], stdin=p1.stdout, stdout=PIPE)
output = p2.communicate()[0]
if(int(output) < max) :
return False;
else:
return True;
TEMPLATE_SERIAL = """
#####################################
#!/bin/bash
#BSUB -n 1 #each job run on 1 core
#BSUB -W 06:00 #job run 2 hour
#BSUB -J {name}
#BSUB -o {logfile} #lsf output file
#BSUB -e {errfile} #lsf error file
#BSUB -q short #submit to "short" queue
#####################################
echo "------------------------------------------------------------------------"
echo "Job started on" `date`
echo "------------------------------------------------------------------------"
{script}
echo "------------------------------------------------------------------------"
echo "Job ended on" `date`
echo "------------------------------------------------------------------------"
"""
currentPath = os.getcwd();
line = gzip.open("/groups/price/ldsc/reference_files/1000G_EUR_Phase3/baseline/baseline.1.annot.gz").readline();
numberCol = len(line.split());
annotationSdOUTName = "/groups/price/ldsc/reference_files/1000G_EUR_Phase3/baseline/baseline.sd.out";
if os.path.exists(annotationSdOUTName):
exit(0);
print annotationSdOUTName;
for col in range(5, numberCol):
dataAnnotGW = npy.array([], dtype=npy.float64);
for chrom in range(1,23,1):
annotationsFile = "/groups/price/ldsc/reference_files/1000G_EUR_Phase3/baseline/baseline." + str(chrom) + ".annot";
freq1KGFile = "/groups/price/ldsc/reference_files/1000G_EUR_Phase3/plink_files/1000G.EUR.QC." + str(chrom) + ".frq";
annnotationsData = npy.genfromtxt(annotationsFile, dtype=None,skip_header=0, usecols=(2,col));
annnotationsDataFram = pd.DataFrame(data=annnotationsData[1:,:], columns=['SNP', 'ANNOT']);
freq1KGData = npy.genfromtxt(freq1KGFile, dtype=None, skip_header=0, usecols=(1,4));
freq1KGDataFrame = pd.DataFrame(data=freq1KGData[1:,:], columns=['SNP', 'MAF']);
freq1KGannnotationsFrame = pd.merge(annnotationsDataFram, freq1KGDataFrame, on='SNP');
freq1KGannnotationsFrame[['ANNOT','MAF']] = freq1KGannnotationsFrame[['ANNOT','MAF']].apply(pd.to_numeric)
commonannnotations = freq1KGannnotationsFrame[freq1KGannnotationsFrame['MAF'] > 0.05];
commonannnotationsVal = commonannnotations['ANNOT'];
dataAnnotGW = npy.append(dataAnnotGW,commonannnotationsVal.as_matrix());
print chrom;
print npy.std(dataAnnotGW);
outfile = open(annotationSdOUTName, 'a');
outfile.write(str(npy.std(dataAnnotGW))+"\t");
outfile.close();