forked from VivekPa/AIAlpha
-
Notifications
You must be signed in to change notification settings - Fork 1
/
autoencoder.py
63 lines (47 loc) · 2.53 KB
/
autoencoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import tensorflow as tf
from keras.layers import Input, Dense
from keras.models import Model
from keras import regularizers
import pandas as pd
import numpy as np
class AutoEncoder:
def __init__(self, encoding_dim):
self.encoding_dim = encoding_dim
def build_train_model(self, input_shape, encoded1_shape, encoded2_shape, decoded1_shape, decoded2_shape):
input_data = Input(shape=(1, input_shape))
encoded1 = Dense(encoded1_shape, activation="relu", activity_regularizer=regularizers.l2(0))(input_data)
encoded2 = Dense(encoded2_shape, activation="relu", activity_regularizer=regularizers.l2(0))(encoded1)
encoded3 = Dense(self.encoding_dim, activation="relu", activity_regularizer=regularizers.l2(0))(encoded2)
decoded1 = Dense(decoded1_shape, activation="relu", activity_regularizer=regularizers.l2(0))(encoded3)
decoded2 = Dense(decoded2_shape, activation="relu", activity_regularizer=regularizers.l2(0))(decoded1)
decoded = Dense(input_shape, activation="sigmoid", activity_regularizer=regularizers.l2(0))(decoded2)
autoencoder = Model(inputs=input_data, outputs=decoded)
encoder = Model(input_data, encoded3)
# Now train the model using data we already preprocessed
autoencoder.compile(loss="mean_squared_error", optimizer="adam")
train = pd.read_csv("preprocessing/rbm_train.csv", index_col=0)
ntrain = np.array(train)
train_data = np.reshape(ntrain, (len(ntrain), 1, input_shape))
# print(train_data)
# autoencoder.summary()
autoencoder.fit(train_data, train_data, epochs=1000)
encoder.save("models/encoder.h5")
test = pd.read_csv("preprocessing/rbm_test.csv", index_col=0)
ntest = np.array(test)
test_data = np.reshape(ntest, (len(ntest), 1, 55))
print(autoencoder.evaluate(test_data, test_data))
# pred = np.reshape(ntest[1], (1, 1, 75))
# print(encoder.predict(pred))
log_train = pd.read_csv("preprocessing/log_train.csv", index_col=0)
coded_train = []
for i in range(len(log_train)):
data = np.array(log_train.iloc[i, :])
values = np.reshape(data, (1, 1, 55))
coded = encoder.predict(values)
shaped = np.reshape(coded, (20,))
coded_train.append(shaped)
train_coded = pd.DataFrame(coded_train)
train_coded.to_csv("features/autoencoded_data.csv")
if __name__ == "__main__":
autoencoder = AutoEncoder(20)
autoencoder.build_train_model(55, 40, 30, 30, 40)