-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubmitAnnotationSD2.py
67 lines (57 loc) · 2.69 KB
/
submitAnnotationSD2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import glob
import subprocess
import numpy as npy
import pandas as pd
from myconfig import *
from subprocess import Popen, PIPE
def maxSubmitReached(max):
p1 = Popen(["qstat", "-u", "fhormoz"], stdout=PIPE)
p2 = Popen(["wc", "-l"], stdin=p1.stdout, stdout=PIPE)
output = p2.communicate()[0]
if(int(output) < max) :
return False;
else:
return True;
TEMPLATE_SERIAL = """
#####################################
#!/bin/bash
#BSUB -n 1 #each job run on 1 core
#BSUB -W 06:00 #job run 2 hour
#BSUB -J {name}
#BSUB -o {logfile} #lsf output file
#BSUB -e {errfile} #lsf error file
#BSUB -q short #submit to "short" queue
#####################################
echo "------------------------------------------------------------------------"
echo "Job started on" `date`
echo "------------------------------------------------------------------------"
{script}
echo "------------------------------------------------------------------------"
echo "Job ended on" `date`
echo "------------------------------------------------------------------------"
"""
currentPath = os.getcwd();
for bedFiles in glob.glob(bedFolder + "/*.bed"):
bedFile = bedFiles.replace(bedFolder,"").replace(".bed","");
print bedFiles;
annotationSdOUTName = annotationsFolder + "/" + annotationsName+ "/" + bedFile + "/" + bedFile + "3.sd.out";
if os.path.exists(annotationSdOUTName):
continue;
print annotationSdOUTName;
dataAnnotGW = npy.array([], dtype=npy.float64);
for chrom in range(1,23,1):
annotationsFile = annotationsFolder + "/" + annotationsName+ "/" + bedFile + "/" + bedFile + "." + str(chrom) + ".l2.ldscore.gz";
freq1KGFile = "/groups/price/ldsc/reference_files/1000G_EUR_Phase3/plink_files/1000G.EUR.QC." + str(chrom) + ".frq";
annnotationsData = npy.genfromtxt(annotationsFile, dtype=None,skip_header=0, usecols=(1,3) )
annnotationsDataFram = pd.DataFrame(data=annnotationsData[1:,:], columns=['SNP', 'ANNOT']);
freq1KGData = npy.genfromtxt(freq1KGFile, dtype=None, skip_header=0, usecols=(1,4));
freq1KGDataFrame = pd.DataFrame(data=freq1KGData[1:,:], columns=['SNP', 'MAF']);
freq1KGannnotationsFrame = pd.merge(annnotationsDataFram, freq1KGDataFrame, on='SNP');
freq1KGannnotationsFrame[['ANNOT','MAF']] = freq1KGannnotationsFrame[['ANNOT','MAF']].apply(pd.to_numeric)
commonannnotations = freq1KGannnotationsFrame[freq1KGannnotationsFrame['MAF'] > 0.05];
commonannnotationsVal = commonannnotations['ANNOT'];
dataAnnotGW = npy.append(dataAnnotGW,commonannnotationsVal.as_matrix());
outfile = open(annotationSdOUTName, 'w');
outfile.write(str(npy.std(npy.sqrt(dataAnnotGW)))+"\n");
outfile.close();