forked from marvinquiet/BART-WEB
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexe_mb_pipeline.py
116 lines (94 loc) · 3.38 KB
/
exe_mb_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# -*- coding: utf-8 -*-
import os, sys
import subprocess
import json
import shutil
import utils
from marge_bart import *
sys.setrecursionlimit(20000)
# ======== load conf.yaml ========
# PROJECT_DIR = os.path.abspath(os.path.dirname(__file__))
# MARGE_DIR = ''
# BART_DIR = ''
# # default
# BART_CORE = 4
# MARGE_CORE = 4
# MARGE_REPEAT_TIMES = 3
# with open('conf.yaml', 'r') as fyaml:
# try:
# conf_data = yaml.load(fyaml)
# BART_DIR = conf_data['BART']['project_path']
# MARGE_DIR = conf_data['MARGE']['project_path']
# BART_CORE = conf_data['BART']['core']
# MARGE_CORE = conf_data['MARGE']['core']
# MARGE_REPEAT_TIMES = conf_data['MARGE']['repeat_times']
# except yaml.YAMLError as e:
# print (e)
def main():
# example: python exe_mb_pipline.py 3 user_key True/False
# print (sys.argv)
# get argv
script_name = sys.argv[0]
repeat_times = int(sys.argv[1])
user_key = sys.argv[2]
bart_flag = bool(sys.argv[3])
import do_process
user_data = do_process.get_user_data(user_key)
user_path = user_data['user_path']
files = user_data['files']
err_msg = ""
for i in range(repeat_times):
marge_output_dir = os.path.join(user_path, 'marge_{}'.format(i))
if init_marge(marge_output_dir):
config_marge(user_data, marge_output_dir)
subprocess.call(["snakemake", "-n"], stdout=subprocess.PIPE, cwd=marge_output_dir)
else:
err_msg += "Error in init marge NO.%d \n" % (i+1)
import multiprocessing
pool = multiprocessing.Pool(processes=MARGE_CORE)
for i in range(repeat_times):
marge_output_dir = os.path.join(user_path, 'marge_{}'.format(i))
pool.apply_async(exe_marge, args=(marge_output_dir, ))
pool.close()
pool.join()
# get marge output
auc_scores = []
auc_files = []
import re
pattern = r"\d+\.?\d*" # integer or float
# find AUC score
for i in range(repeat_times):
marge_output_dir = os.path.join(user_path, 'marge_{}'.format(i))
for upload_file in files:
filename = os.path.basename(upload_file)
filename, file_ext = os.path.splitext(filename)
regression_score_file = os.path.join(marge_output_dir, 'margeoutput/regression/{}_target_regressionInfo.txt'.format(filename))
if not os.path.exists(regression_score_file):
err_msg += "File not exists: %s" % (regression_score_file)
continue
with open(regression_score_file, 'r') as fopen:
for line in fopen:
if 'AUC = ' in line:
score = re.findall(pattern, line)[0]
auc_scores.append(float(score))
auc_files.append(marge_output_dir)
# find max AUC score
max_auc = max(auc_scores)
max_index = -1
for i in range(len(auc_scores)):
if auc_scores[i] == max_auc:
max_index = i
break
# find max AUC folder & change it to folder /marge_data
if max_index == -1:
err_msg += "Severe error in marge process!!\n"
else:
auc_file = auc_files[i]
os.rename(auc_file, os.path.join(user_path, 'marge_data'))
# if bart
print (user_data)
if bart_flag:
exe_bart_geneset(user_data)
print (err_msg)
if __name__ == '__main__':
main()