-
Notifications
You must be signed in to change notification settings - Fork 126
/
layers.py
132 lines (106 loc) · 4.01 KB
/
layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
Layers for multimodal-ranking
"""
import theano
import theano.tensor as tensor
import numpy
from utils import _p, ortho_weight, norm_weight, xavier_weight, tanh, linear
# layers: 'name': ('parameter initializer', 'feedforward')
layers = {'ff': ('param_init_fflayer', 'fflayer'),
'gru': ('param_init_gru', 'gru_layer'),
}
def get_layer(name):
"""
Return param init and feedforward functions for the given layer name
"""
fns = layers[name]
return (eval(fns[0]), eval(fns[1]))
# Feedforward layer
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True):
"""
Affine transformation + point-wise nonlinearity
"""
if nin == None:
nin = options['dim_proj']
if nout == None:
nout = options['dim_proj']
params[_p(prefix,'W')] = xavier_weight(nin, nout)
params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')
return params
def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs):
"""
Feedforward pass
"""
return eval(activ)(tensor.dot(state_below, tparams[_p(prefix,'W')])+tparams[_p(prefix,'b')])
# GRU layer
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
"""
Gated Recurrent Unit (GRU)
"""
if nin == None:
nin = options['dim_proj']
if dim == None:
dim = options['dim_proj']
W = numpy.concatenate([norm_weight(nin,dim),
norm_weight(nin,dim)], axis=1)
params[_p(prefix,'W')] = W
params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix,'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix,'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix,'Ux')] = Ux
params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def gru_layer(tparams, state_below, init_state, options, prefix='gru', mask=None, one_step=False, **kwargs):
"""
Feedforward pass through GRU
"""
nsteps = state_below.shape[0]
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
dim = tparams[_p(prefix,'Ux')].shape[1]
if init_state == None:
init_state = tensor.alloc(0., n_samples, dim)
if mask == None:
mask = tensor.alloc(1., state_below.shape[0], 1)
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n*dim:(n+1)*dim]
return _x[:, n*dim:(n+1)*dim]
state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]
state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')]
U = tparams[_p(prefix, 'U')]
Ux = tparams[_p(prefix, 'Ux')]
def _step_slice(m_, x_, xx_, h_, U, Ux):
preact = tensor.dot(h_, U)
preact += x_
r = tensor.nnet.sigmoid(_slice(preact, 0, dim))
u = tensor.nnet.sigmoid(_slice(preact, 1, dim))
preactx = tensor.dot(h_, Ux)
preactx = preactx * r
preactx = preactx + xx_
h = tensor.tanh(preactx)
h = u * h_ + (1. - u) * h
h = m_[:,None] * h + (1. - m_)[:,None] * h_
return h
seqs = [mask, state_below_, state_belowx]
_step = _step_slice
if one_step:
rval = _step(*(seqs+[init_state, tparams[_p(prefix, 'U')], tparams[_p(prefix, 'Ux')]]))
else:
rval, updates = theano.scan(_step,
sequences=seqs,
outputs_info = [init_state],
non_sequences = [tparams[_p(prefix, 'U')],
tparams[_p(prefix, 'Ux')]],
name=_p(prefix, '_layers'),
n_steps=nsteps,
profile=False,
strict=True)
rval = [rval]
return rval