-
Notifications
You must be signed in to change notification settings - Fork 17
/
demhmc3.m
135 lines (116 loc) · 4.41 KB
/
demhmc3.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
%DEMHMC3 Demonstrate Bayesian regression with Hybrid Monte Carlo sampling.
%
% Description
% The problem consists of one input variable X and one target variable
% T with data generated by sampling X at equal intervals and then
% generating target data by computing SIN(2*PI*X) and adding Gaussian
% noise. The model is a 2-layer network with linear outputs, and the
% hybrid Monte Carlo algorithm (with persistence) is used to sample
% from the posterior distribution of the weights. The graph shows the
% underlying function, 300 samples from the function given by the
% posterior distribution of the weights, and the average prediction
% (weighted by the posterior probabilities).
%
% See also
% DEMHMC2, HMC, MLP, MLPERR, MLPGRAD
%
% Copyright (c) Ian T Nabney (1996-2001)
% Generate the matrix of inputs x and targets t.
ndata = 20; % Number of data points.
noise = 0.1; % Standard deviation of noise distribution.
nin = 1; % Number of inputs.
nout = 1; % Number of outputs.
seed = 42; % Seed for random number generators.
randn('state', seed);
rand('state', seed);
x = 0.25 + 0.1*randn(ndata, nin);
t = sin(2*pi*x) + noise*randn(size(x));
clc
disp('This demonstration illustrates the use of the hybrid Monte Carlo')
disp('algorithm to sample from the posterior weight distribution of a')
disp('multi-layer perceptron.')
disp(' ')
disp('A regression problem is used, with the one-dimensional data drawn')
disp('from a noisy sine function. The x values are sampled from a normal')
disp('distribution with mean 0.25 and variance 0.01.')
disp(' ')
disp('First we initialise the network.')
disp(' ')
disp('Press any key to continue.')
pause
% Set up network parameters.
nhidden = 5; % Number of hidden units.
alpha = 0.001; % Coefficient of weight-decay prior.
beta = 100.0; % Coefficient of data error.
% Create and initialize network model.
% Initialise weights reasonably close to 0
net = mlp(nin, nhidden, nout, 'linear', alpha, beta);
net = mlpinit(net, 10);
clc
disp('Next we take 100 samples from the posterior distribution. The first')
disp('300 samples at the start of the chain are omitted. As persistence')
disp('is used, the momentum has a small random component added at each step.')
disp('10 iterations are used at each step (compared with 100 in demhmc2).')
disp('The step size is 0.005 (compared with 0.002).')
disp('The new state is accepted if the threshold')
disp('value is greater than a random number between 0 and 1.')
disp(' ')
disp('Negative step numbers indicate samples discarded from the start of the')
disp('chain.')
disp(' ')
disp('Press any key to continue.')
pause
% Set up vector of options for hybrid Monte Carlo.
nsamples = 100; % Number of retained samples.
options = foptions; % Default options vector.
options(1) = 1; % Switch on diagnostics.
options(5) = 1; % Use persistence
options(7) = 10; % Number of steps in trajectory.
options(14) = nsamples; % Number of Monte Carlo samples returned.
options(15) = 300; % Number of samples omitted at start of chain.
options(17) = 0.95; % Alpha value in persistence
options(18) = 0.005; % Step size.
w = mlppak(net);
% Initialise HMC
hmc('state', 42);
[samples, energies] = hmc('neterr', w, options, 'netgrad', net, x, t);
clc
disp('The plot shows the underlying noise free function, the 100 samples')
disp('produced from the MLP, and their average as a Monte Carlo estimate')
disp('of the true posterior average.')
disp(' ')
disp('Press any key to continue.')
pause
nplot = 300;
plotvals = [0 : 1/(nplot - 1) : 1]';
pred = zeros(size(plotvals));
fh1 = figure;
hold on
for k = 1:nsamples
w2 = samples(k,:);
net2 = mlpunpak(net, w2);
y = mlpfwd(net2, plotvals);
% Sum predictions
pred = pred + y;
h4 = plot(plotvals, y, '-r', 'LineWidth', 1);
end
pred = pred./nsamples;
% Plot data
h1 = plot(x, t, 'ob', 'LineWidth', 2, 'MarkerFaceColor', 'blue');
axis([0 1 -3 3])
% Plot function
[fx, fy] = fplot('sin(2*pi*x)', [0 1], '--g');
h2 = plot(fx, fy, '--g', 'LineWidth', 2);
set(gca, 'box', 'on');
% Plot averaged prediction
h3 = plot(plotvals, pred, '-c', 'LineWidth', 2);
lstrings = char('Data', 'Function', 'Prediction', 'Samples');
legend([h1 h2 h3 h4], lstrings, 3);
hold off
disp('Note how the predictions become much further from the true function')
disp('away from the region of high data density.')
disp(' ')
disp('Press any key to exit.')
pause
close(fh1);
clear all;