-
Notifications
You must be signed in to change notification settings - Fork 1
/
trainers.py
191 lines (150 loc) · 7.39 KB
/
trainers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import random
import timeit
import numpy as np
import os
from utils import chunks
class OnlineTrainer:
def train(self, model, train_set, loss, epochs, optimizer, show_progress=False, save_progress=False):
"""This is "online learning": we backpropagate and update after forwarding one input data at the time."""
epochs_losses = []
for epoch in xrange(epochs):
if show_progress: print("Epoch %d/%d" % (epoch + 1, epochs))
epoch_loss_sum = 0.
for x, target in train_set:
J, dJdy = self.train_one(model, x, target, loss, optimizer, show_progress=show_progress)
epoch_loss_sum += J
epoch_mean_loss = epoch_loss_sum / float(epochs)
epochs_losses.append(epoch_mean_loss)
return epochs_losses
def train_one(self, model, x, target, loss, optimizer, show_progress=False):
y = model.forward(x, is_training=True)
J = loss.calc_loss(y, target)
dJdy = loss.calc_gradient(y, target)
model.backward(dJdy)
model.update_weights(optimizer)
return J, dJdy
class PlottableTrainer:
def __init__(self):
pass
# def plot_errors_history(self):
# plt.figure()
# plt.title('Errors History (J)')
# plt.plot(xrange(len(self.errors_history)), self.errors_history, color='red')
# plt.ylim([0, 2])
# return plt
# def plot_loss_gradient_history(self):
# plt.figure()
# plt.title('Loss Gradient History (dJ/dy)')
# plt.plot(xrange(len(self.loss_gradient_history)), self.loss_gradient_history, color='orange')
# plt.ylim([0, 2])
# return plt
class MinibatchTrainer:
def train_minibatches(self, model, train_set,
batch_size, loss, epochs, optimizer, shuffle=True,
show_progress=False, save_progress=False):
if batch_size is None:
batch_size = len(train_set)
if show_progress:
print("Train Set size = %d, Batch size = %d" % (len(train_set), batch_size))
# copy train_set locally to shuffle it
data = list(train_set)
epoch_mean_losses = []
for epoch in xrange(epochs):
if shuffle:
random.shuffle(data)
batch_mean_losses_sum = 0.
for batch in chunks(data, batch_size):
batch_mean_loss = self.train_one_minibatch(model, batch, loss, optimizer)
batch_mean_losses_sum += batch_mean_loss
epoch_mean_loss = batch_mean_losses_sum / float(batch_size)
epoch_mean_losses.append(epoch_mean_loss)
if show_progress:
print("Epoch %d/%d: Epoch Mean Loss = %f" % (epoch + 1, epochs, batch_mean_loss))
return epoch_mean_losses
def train_one_minibatch(self, model, batch, loss, optimizer):
batch_mean_loss, mean_delta = self.forward_and_backward_one_minibatch(model, batch, loss)
model.update_weights(optimizer)
return batch_mean_loss
def forward_and_backward_one_minibatch(self, model, batch, loss):
loss_sum = 0.
delta_sum = 0.
for couple in batch:
x, target = couple
y = model.forward(x, is_training=True)
J = loss.calc_loss(y, target)
dJdy = loss.calc_gradient(y, target)
model.backward(dJdy)
loss_sum += J
delta_sum += dJdy
batches_n = float(len(batch))
mean_delta = delta_sum / batches_n
mean_losses = loss_sum / batches_n
batch_mean_loss = np.mean(mean_losses)
return batch_mean_loss, mean_delta
train = train_minibatches
# class RNNTrainer:
# def train(self, rnn, seq_length, otimizer):
# rnn.
class PatienceTrainer(MinibatchTrainer):
"""This multibatch trainer uses a validation set and a patience variable to decide when to stop.
Inspired by this Theano tutorial: http://deeplearning.net/tutorial/mlp.html
"""
def train(self, model,
train_batches, valid_batch, test_batch,
batch_size, loss, max_epochs, optimizer,
test_score_function,
patience=10000, patience_increase=2, improvement_threshold=0.995):
"""
:param model: the SequentialModel instance to use
:param train_set:
:param valid_set:
:param test_set:
:param batch_size:
:param loss: loss instance to use, for example: patience=CrossEntropyLoss()
:param max_epochs:
:param optimizer: optimizer instance to use, for example: optimizer=SGD(learning_rate=0.5)
:param patience: look as this many examples regardless
:param patience_increase: wait this much longer when a new best is found
:param improvement_threshold: a relative improvement of this much is considered significant
:return:
"""
n_train_batches = float(len(train_batches)) // batch_size
# go through this many minibatches before checking the network on the validation set;
# in this case we check every epoch
validation_frequency = min(n_train_batches, patience // 2)
best_valid_loss = np.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()
epoch = 0
done_looping = False
while (epoch < max_epochs) and (not done_looping):
epoch += 1
for minibatch_index, minibatch in enumerate(list(chunks(train_batches, batch_size))):
iter_i = (epoch - 1) * n_train_batches + minibatch_index
train_batch_mean_loss = self.train_one_minibatch(model, minibatch, loss, optimizer)
if (iter_i + 1) % validation_frequency == 0:
# compute zero-one loss on validation set
mean_valid_loss, valid_delta, mean_x = self.forward_one_minibatch(model, valid_batch, loss)
print('epoch %i, minibatch %i/%i, validation error %f, last train batch err %f (patience %d)' %
(epoch, minibatch_index + 1, n_train_batches, mean_valid_loss, train_batch_mean_loss,
patience))
# if we got the best validation score until now
if mean_valid_loss < best_valid_loss:
# improve patience if loss improvement is good enough
if mean_valid_loss < best_valid_loss * improvement_threshold:
patience = max(patience, iter_i * patience_increase)
best_valid_loss = mean_valid_loss
best_iter = iter_i
# test it on the test set
test_score = test_score_function(model, test_batch)
print('\tepoch %i, minibatch %i/%i, test error of best model %f %%' %
(epoch, minibatch_index + 1, n_train_batches, test_score))
if patience <= iter_i:
done_looping = True
break
end_time = timeit.default_timer()
print('Optimization complete. Best validation score of %f %% '
'obtained at iteration %i, with test performance %f %%' %
(best_valid_loss, best_iter + 1, test_score))
print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))