-
Notifications
You must be signed in to change notification settings - Fork 0
/
10CV.py
142 lines (121 loc) · 4.43 KB
/
10CV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import pickle
import os
import random
from random import randint
from sklearn.model_selection import KFold
from random import shuffle
from tqdm import tqdm
from torch_geometric.data import Data
#1、导入数据
def load_Smiles():
Datas = './CID_smiles.txt'
Smiles = []
fileIn = open(Datas)
line = fileIn.readline()
while line:
lineArr = line.strip().split()
Smiles.append(lineArr[1])
line = fileIn.readline()
return Smiles
def neg():
neg = []
neg1 = randint(0,554)
neg2 = randint(0,554)
inte = randint(0,1317)
lab = 0
neg.append(neg1)
neg.append(neg2)
neg.append(inte)
neg.append(lab)
return neg
def load_DDIs():
Datas = './DDI.txt'
DDIs = []
fileIn = open(Datas)
line = fileIn.readline()
while line:
lineArr = line.strip().split()
lineArr.append(1)
DDIs.append(list(map(int,lineArr)))
DDIs.append(neg())
line = fileIn.readline()
return DDIs
data_loader = 'train'
def write_to_txt():
DDs = load_DDIs()
shuffle(DDs)
Smiles = load_Smiles()
kf = KFold(10, shuffle=True, random_state=1)
flag = 0
for i, (trian_id, test_id) in tqdm(enumerate(kf.split(DDs))):
print(trian_id)
for ids in trian_id:
DD = DDs[ids]
i_str = str(i+1)
flag += 1
filename = 'train'+i_str+'.txt'
with open(filename, 'a') as f1:
f1.write(str(DD[0]) + ' ' + str(DD[1]) + ' ' + Smiles[DD[0]] + ' ' + Smiles[DD[1]] + ' ' + str(DD[2]) + ' ' + str(DD[3]) + '\n')
for ids in test_id:
DD = DDs[ids]
i_str = str(i + 1)
filename = 'test' + i_str + '.txt'
with open(filename, 'a') as f2:
f2.write(str(DD[0]) + ' ' + str(DD[1]) + ' ' + Smiles[DD[0]] + ' ' + Smiles[DD[1]] + ' ' + str(DD[2]) + ' ' + str(DD[3]) + '\n')
return 0
#3、导入以上DDI数据,转换成可训练的pkl文件
def load_train_DDI(i):
Datas = './txt/train{}.txt'.format(i)
DDI_index = []
fileIn = open(Datas)
line = fileIn.readline()
while line:
id1, id2, smiles1, smiles2, interaction,label = line.strip().split()
line = fileIn.readline()
DDI_index.append(map(int,[id1, id2, interaction, label]))
return DDI_index
def load_test_DDI(i):
Datas = './txt/test{}.txt'.format(i)
DDI_index = []
fileIn = open(Datas)
line = fileIn.readline()
while line:
id1, id2, smiles1, smiles2, interaction,label = line.strip().split()
line = fileIn.readline()
DDI_index.append(map(int,[id1, id2, interaction, label]))
return DDI_index
def save_data(data, filename):
dirname = f'./'
if not os.path.exists(dirname):
os.makedirs(dirname)
filename = dirname + '/' + filename
with open(filename, 'wb') as f:
pickle.dump(data, f)
print(f'\nData saved as {filename}!')
def load_DD_data():
with open(f'./drug_data.pkl', 'rb') as f:
drugdata = pickle.load(f)
for i in range(1,11):
Drug_pair = {}
DDI_index = load_train_DDI(i)
for index, (drug1, drug2, interaction, label) in enumerate(DDI_index):
x_1, edge_index_1, edge_feature_1 = drugdata[drug1]
Drug1 = Data(x=x_1, edge_index=edge_index_1, edge_attr=edge_feature_1)
x_2, edge_index_2, edge_feature_2 = drugdata[drug2]
Drug2 = Data(x=x_2, edge_index=edge_index_2, edge_attr=edge_feature_2)
drugpair = dict(drug_1=Drug1, drug_2=Drug2, Inter=interaction, Label=label)
Drug_pair[index] = drugpair
save_data(Drug_pair, 'train{}.pkl'.format(i))
Drug_pair = {}
DDI_index = load_test_DDI(i)
for index, (drug1, drug2, interaction, label) in enumerate(DDI_index):
x_1, edge_index_1, edge_feature_1 = drugdata[drug1]
Drug1 = Data(x=x_1, edge_index=edge_index_1, edge_attr=edge_feature_1)
x_2, edge_index_2, edge_feature_2 = drugdata[drug2]
Drug2 = Data(x=x_2, edge_index=edge_index_2, edge_attr=edge_feature_2)
drugpair = dict(drug_1=Drug1, drug_2=Drug2, Inter=interaction, Label=label)
Drug_pair[index] = drugpair
save_data(Drug_pair, 'test{}.pkl'.format(i))
return Drug_pair
# a = load_DD_data()
a = write_to_txt()