-
Notifications
You must be signed in to change notification settings - Fork 0
/
mlp.py
157 lines (135 loc) · 7.22 KB
/
mlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import numpy as np
class MLP():
def __init__(self, number_of_layers, number_of_neurons, activation_functions, learning_rate, loss_function):
self.number_of_layers = number_of_layers
self.number_of_neurons = number_of_neurons
self.activation_functions = activation_functions
self.learning_rate = learning_rate
self.loss_function = loss_function
def fit_and_transform(self, data_original, test_data, outcome, epochs):
self.data_original = data_original
self.outcome = outcome
self.epochs = epochs
self.weights = [1]*self.number_of_layers
self.biases = [1]*self.number_of_layers
# initialize input layer weights and biases with random number [0,1)
self.weights[0] = np.random.rand(self.number_of_neurons[0], data_original.shape[1])
self.biases[0] = np.random.rand(self.number_of_neurons[0], 1)
for i in range(self.number_of_layers):
if i == 0:
continue
else:
# initialize hidden layer and output layer
self.weights[i] = np.random.rand(self.number_of_neurons[i], self.number_of_neurons[i - 1])
self.biases[i] = np.random.rand(self.number_of_neurons[i], 1)
# initialize z value array size
z = [1]*self.number_of_layers
# initialize a value array size
a = [1]*self.number_of_layers
# the number of epochs
for j in range(self.epochs):
# we use mini-batch learning here, a batch size is 128
for k in range(0,data_original.shape[0]-128,128):
# initialize temp variable value with input data
value = (data_original.iloc[k:k+128,:]).T
# forward
for i in range(self.number_of_layers):
# get z value by using weights dot product a, and plus bias
z[i] = np.dot(self.weights[i], value) + self.biases[i]
# apply different activation functions
if self.activation_functions[i] == 'sigmoid':
value = self.sigmoid(z[i])
elif self.activation_functions[i] == 'tanh':
value = self.tanh(z[i])
elif self.activation_functions[i] == 'ReLU':
value = self.ReLU(z[i])
elif self.activation_functions[i] == 'Leaky_ReLU':
value = self.Leaky_ReLU(z[i])
# store a value
a[i] = value
# back-propagation
# initialize delta of biases and weights array size
delta_bias = [1] * self.number_of_layers
delta_weights = [1] * self.number_of_layers
# calculate output layer first
if self.loss_function == 'MSE':
# different loss function will have different way to calculate delta value of final layer
loss = a[-1] - self.outcome[k:k+128]
delta_final = np.array(loss) * np.array(self.dsigmoid(z[-1]))
delta_bias[-1] = delta_final
# if there are more than one layer
if len(a) != 1:
delta_weights[-1] = np.dot(delta_final, a[-2].T)
else:
# if only one layer
delta_weights[-1] = np.dot(delta_final, self.data_original.iloc[k:k+128,:])
elif self.loss_function == 'cross_entropy':
loss = a[-1] - self.outcome[k:k+128]
delta_final = loss
delta_bias[-1] = delta_final
if len(a) != 1:
delta_weights[-1] = np.dot(delta_final, a[-2].T)
else:
delta_weights[-1] = np.dot(delta_final, self.data_original.iloc[k:k+128,:])
delta = delta_final
# calculate other layers' delta value
for i in range(1, self.number_of_layers):
if self.activation_functions[~i] == 'sigmoid':
delta = np.array(np.dot(self.weights[~i + 1].T, delta)) * np.array(self.dsigmoid(z[~i]))
elif self.activation_functions[~i] == 'tanh':
delta = np.array(np.dot(self.weights[~i + 1].T, delta)) * np.array(self.dtanh(z[~i]))
elif self.activation_functions[~i] == 'ReLU':
delta = np.array(np.dot(self.weights[~i + 1].T, delta)) * np.array(self.dReLU(z[~i]))
elif self.activation_functions[~i] == 'Leaky_ReLU':
delta = np.array(np.dot(self.weights[~i + 1].T, delta)) * np.array(self.dLeaky_ReLU(z[~i]))
# delta of bias is the delta value
delta_bias[~i] = delta
# delta of weights have to dot product a value
if i != self.number_of_layers - 1:
delta_weights[~i] = np.dot(delta, a[~i - 1].T)
else:
delta_weights[~i] = np.dot(delta, self.data_original.iloc[k:k+128,:])
# finally update weights and biases, remember to divide total delta by 128 which is batch size
for i in range(self.number_of_layers):
self.weights[i] = self.weights[i] - self.learning_rate * delta_weights[i] / 128
self.biases[i] = self.biases[i] - self.learning_rate * (np.dot(delta_bias[i],np.matrix(np.ones(128)).T))/128
# start next epoch
result = []
# using the same batch size as train, do forward again
for k in range(0,test_data.shape[0]-128,128):
value = test_data.iloc[k:k+128,:].T
for i in range(self.number_of_layers):
z[i] = np.dot(self.weights[i], value) + self.biases[i]
if self.activation_functions[i] == 'sigmoid':
value = self.sigmoid(z[i])
elif self.activation_functions[i] == 'tanh':
value = self.tanh(z[i])
elif self.activation_functions[i] == 'ReLU':
value = self.ReLU(z[i])
elif self.activation_functions[i] == 'Leaky_ReLU':
value = self.Leaky_ReLU(z[i])
a[i] = value
result = np.vstack((np.array(result).reshape(-1,1),(np.array(a[-1].T))))
return result
def sigmoid(self, z):
z = np.clip(z, -500, 500)
return 1.0 / (1.0 + np.exp(-z))
def tanh(self, z):
return np.tanh(z)
def ReLU(self, z):
return np.maximum(z, 0)
def Leaky_ReLU(self, z):
alpha = 0.01
return np.maximum(z, z * alpha)
def dsigmoid(self, z):
return np.array(1.0 / (1.0 + np.exp(-z))) * np.array(1 - 1.0 / (1.0 + np.exp(-z)))
def dtanh(self, z):
return 1 - np.array(np.tanh(z)) * np.array(np.tanh(z))
def dReLU(self, z):
z[z >= 0] = 1
z[z < 0] = 0
return z
def dLeaky_ReLU(self, z):
z[z >= 0] = 1
z[z < 0] = 0.01
return z