-
Notifications
You must be signed in to change notification settings - Fork 3
/
featureselection.py
48 lines (36 loc) · 1.11 KB
/
featureselection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
import copy
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_regression
def rewrite(myfile):
filename = open(myfile+".revised", "w")
file = open(myfile, "r")
count = 0
for x in file:
x = x.strip()
filename.write(x+"\n")
count = count + 1
if count > 5000:
break
filename.close()
file.close()
def getY():
dataset = np.loadtxt("train", delimiter=",")
y_train = dataset[:,72].reshape(-1,1)
return y_train
def read_and_normalize_data():
rewrite("train_text.vectors")
dataset = np.loadtxt("train_text.vectors.revised", delimiter=" ")
x_train = dataset[:,0:4096]
return x_train
def writeFile(mask):
filename = open("mask", "w")
for bool in mask:
filename.write(str(bool))
filename.write("\n")
filename.close()
x_train_subarr = read_and_normalize_data()
y_train_subarr = getY()[0:len(x_train_subarr)]
b = SelectKBest(score_func=mutual_info_regression, k=72)
X_new = b.fit_transform(x_train_subarr, y_train_subarr)
mask = b.get_support()
writeFile(mask)