-
Notifications
You must be signed in to change notification settings - Fork 1
/
rnn.py
81 lines (69 loc) · 2.97 KB
/
rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#! /usr/bin/env python
"""
Filename: rnn.py
Author: Denny Britz, Emily Daniels
Date: May 2016
Purpose: Trains the model on the text specified. This class is a
re-implementation of an RNN Theano example created by Denny Britz:
https://github.com/dennybritz/rnn-tutorial-rnnlm
"""
import numpy as np
import theano as theano
import theano.tensor as T
class RNN(object):
def __init__(self, word_dim, hidden_dim, bptt_truncate):
self.word_dim = word_dim
self.hidden_dim = hidden_dim
self.bptt_truncate = bptt_truncate
U = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim),
(hidden_dim, word_dim))
V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim),
(word_dim, hidden_dim))
W = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim),
(hidden_dim, hidden_dim))
self.U = theano.shared(name='U', value=U.astype(theano.config.floatX))
self.V = theano.shared(name='V', value=V.astype(theano.config.floatX))
self.W = theano.shared(name='W', value=W.astype(theano.config.floatX))
self.theano = {}
self.__theano_build__()
def __theano_build__(self):
U, V, W = self.U, self.V, self.W
x = T.ivector('x')
y = T.ivector('y')
def forward_prop_step(x_t, s_t_prev, U, V, W):
s_t = T.tanh(U[:,x_t] + W.dot(s_t_prev))
o_t = T.nnet.softmax(V.dot(s_t))
return [o_t[0], s_t]
[o,s], updates = theano.scan(
forward_prop_step,
sequences=x,
outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))],
non_sequences=[U, V, W],
truncate_gradient=self.bptt_truncate,
strict=True)
prediction = T.argmax(o, axis=1)
o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
# gradients
dU = T.grad(o_error, U)
dV = T.grad(o_error, V)
dW = T.grad(o_error, W)
# assign functions
self.forward_propagation = theano.function([x], o)
self.predict = theano.function([x], prediction)
self.ce_error = theano.function([x, y], o_error)
self.bptt = theano.function([x, y], [dU, dV, dW])
# SGD
learning_rate = T.scalar('learning_rate')
self.sgd_step = theano.function([x,y,learning_rate], [],
updates=[(self.U, self.U - learning_rate * dU),
(self.V, self.V - learning_rate * dV),
(self.W, self.W - learning_rate * dW)])
def calculate_total_loss(self, X, Y):
return np.sum([self.ce_error(x,y) for x,y in zip(X,Y)])
def calculate_loss(self, X, Y):
# divide calculate_loss by the number of words
num_words = np.sum([len(y) for y in Y])
return self.calculate_total_loss(X,Y)/float(num_words)
def softmax(x):
xt = np.exp(x - np.max(x))
return xt / np.sum(xt)