-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbackprop-through-time.c
214 lines (187 loc) · 6.47 KB
/
backprop-through-time.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
// *********************** Back-Prop Through Time ***************************
// Try to learn input-output pairs with flexible iteration
// As a first step we only unfold once (to learn a 2-step operation).
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <math.h>
#include <assert.h>
#include <time.h> // time as random seed in create_NN()
#include "BPTT-RNN.h"
extern double rectifier(double);
#define Eta 0.01 // learning rate
#define BIASOUTPUT 1.0 // output for bias. It's always 1.
//************************ create neural network *********************//
// GIVEN: how many layers, and how many neurons in each layer
RNN *create_BPTT_NN(int numLayers, int *neuronsPerLayer)
{
RNN *net = (RNN *) malloc(sizeof(RNN));
srand(time(NULL));
net->numLayers = numLayers;
assert(numLayers >= 3);
net->layers = (rLAYER *) malloc(numLayers * sizeof (rLAYER));
//construct input layer, no weights
net->layers[0].numNeurons = neuronsPerLayer[0];
net->layers[0].neurons = (rNEURON *) malloc(neuronsPerLayer[0] * sizeof (rNEURON));
//construct hidden layers
for (int l = 1; l < numLayers; l++) //construct layers
{
// This takes cares of n-folds of outputs and gradients:
net->layers[l].neurons = (rNEURON *) malloc(neuronsPerLayer[l] * sizeof (rNEURON));
net->layers[l].numNeurons = neuronsPerLayer[l];
for (int n = 0; n < neuronsPerLayer[l]; n++) // construct each neuron in the layer
{
// Only 1 array of weights per neuron, because weights are shared across folds
net->layers[l].neurons[n].weights =
(double *) malloc((neuronsPerLayer[l - 1] + 1) * sizeof (double));
for (int i = 0; i <= neuronsPerLayer[l - 1]; i++)
{
//construct weights of neuron from previous layer neurons
//when k = 0, it's bias weight
extern double randomWeight();
net->layers[l].neurons[n].weights[i] = randomWeight();
//net->layers[i].neurons[j].weights[k] = 0.0f;
}
}
}
return net;
}
void BPTT_re_randomize(RNN *net, int numLayers, int *neuronsPerLayer)
{
srand(time(NULL));
for (int l = 1; l < numLayers; ++l) // for each layer
for (int n = 0; n < neuronsPerLayer[l]; ++n) // for each neuron
for (int i = 0; i <= neuronsPerLayer[l - 1]; ++i) // for each weight
{
extern double randomWeight();
net->layers[l].neurons[n].weights[i] = randomWeight();
}
}
void free_BPTT_NN(RNN *net, int *neuronsPerLayer)
{
// for input layer
free(net->layers[0].neurons);
// for each hidden layer
int numLayers = net->numLayers;
for (int l = 1; l < numLayers; l++) // for each layer
{
for (int n = 0; n < neuronsPerLayer[l]; n++) // for each neuron in the layer
{
free(net->layers[l].neurons[n].weights);
}
free(net->layers[l].neurons);
}
// free all layers
free(net->layers);
// free the whole net
free(net);
}
//**************************** forward-propagation ***************************//
// Propagate throught the *unfolded* network n times.
// Record all activities (output)
void forward_BPTT(RNN *net, int dim_V, double V[], int nfold)
{
int numLayers = net->numLayers;
rLAYER lastLayer = net->layers[numLayers - 1];
for (int t = 0; t < nfold; ++t) // for each unfolding...
{
//set the output of input layer
for (int k = 0; k < dim_V; ++k)
net->layers[0].neurons[k].output[t] =
(t == 0 ?
V[k] :
// feed output of last layer back to input
lastLayer.neurons[k].output[t - 1]);
//calculate output from hidden layers to output layer
for (int l = 1; l < numLayers; l++)
{
for (int n = 0; n < net->layers[l].numNeurons; n++)
{
double v = 0.0; //induced local field for neurons
//calculate v, which is the sum of the product of input and weights
for (int k = 0; k <= net->layers[l - 1].numNeurons; k++)
{
if (k == 0)
v += net->layers[l].neurons[n].weights[k] * BIASOUTPUT;
else
v += net->layers[l].neurons[n].weights[k] *
net->layers[l - 1].neurons[k - 1].output[t];
}
extern double rectifier(double);
net->layers[l].neurons[n].output[t] = rectifier(v);
// This is to prepare for back-prop
#define Leakage 0.1
if (v < 0.0)
net->layers[l].neurons[n].grad[t] = Leakage;
// if (v > 1.0)
// net->layers[l].neurons[n].grad[t] = Leakage;
else
net->layers[l].neurons[n].grad[t] = 1.0;
}
}
}
}
//*************************** Back-Prop Through Time ***************************//
void backprop_through_time(RNN *net, double *errors, int nfold)
{
int numLayers = net->numLayers;
rLAYER lastLayer = net->layers[numLayers - 1];
for (int t = nfold - 1; t >= 0; --t) // back-prop through time...
{
if (t == nfold - 1)
// calculate ∇ for output layer
for (int n = 0; n < lastLayer.numNeurons; ++n)
{
// double output = lastLayer.neurons[n].output[t];
//for output layer, ∇ = σ'(x)∙error
lastLayer.neurons[n].grad[t] *= errors[n];
}
else
// for the "recurrent" layer
for (int n = 0; n < lastLayer.numNeurons; ++n)
{
// double output = lastLayer.neurons[n].output[t];
double sum = 0.0f;
rLAYER nextLayer = net->layers[1];
for (int i = 0; i < nextLayer.numNeurons; i++) // for each weight
{
sum += nextLayer.neurons[i].grad[t + 1];
}
lastLayer.neurons[n].grad[t] *= sum;
}
// calculate ∇ for hidden layers
for (int l = numLayers - 2; l > 0; --l) // for each hidden layer (except layer 0 has no weights)
{
for (int n = 0; n < net->layers[l].numNeurons; n++) // for each neuron in layer
{
// double output = net->layers[l].neurons[n].output[t];
double sum = 0.0f;
rLAYER nextLayer = net->layers[l + 1];
for (int i = 0; i < nextLayer.numNeurons; i++) // for each weight
{
sum += nextLayer.neurons[i].weights[n + 1] // ignore weights[0] = bias
* nextLayer.neurons[i].grad[t];
}
net->layers[l].neurons[n].grad[t] *= sum;
}
}
}
// update all weights
for (int t = nfold - 1; t >= 0; --t) // sum over all time...
{
for (int l = 1; l < numLayers; ++l) // except for 0th layer which has no weights
{
for (int n = 0; n < net->layers[l].numNeurons; n++) // for each neuron
{
net->layers[l].neurons[n].weights[0] += Eta *
net->layers[l].neurons[n].grad[t] * 1.0; // 1.0f = bias input
for (int i = 0; i < net->layers[l - 1].numNeurons; i++) // for each weight
{
double inputForThisNeuron = net->layers[l - 1].neurons[i].output[t];
net->layers[l].neurons[n].weights[i + 1] += Eta *
net->layers[l].neurons[n].grad[t] * inputForThisNeuron;
}
}
}
}
}