Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
SabeenN authored May 19, 2019
1 parent 34292bb commit 3054a17
Show file tree
Hide file tree
Showing 12 changed files with 448 additions and 0 deletions.
12 changes: 12 additions & 0 deletions Speech_Recognition/k-layer net w BN/BatchNormBackPass.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
function g = BatchNormBackPass(g, si, mui, vari)
eps = 1e-6;
n = size(g,2);
one_v = ones(n,1);
sigma1 = ((vari + eps).^(-0.5));
sigma2 = ((vari + eps).^(-1.5));
G1 = g.*(sigma1*one_v');
G2 = g.*(sigma2*one_v');
D = si - (repmat(mui,1,size(mui,2))*one_v');
c = (G2.*D)*one_v;
g = G1 - (1/n)*G1*one_v-(1/n)*(D.*(c*one_v'));
end
4 changes: 4 additions & 0 deletions Speech_Recognition/k-layer net w BN/BatchNormalize.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
function [shat] = BatchNormalize(s,mean,var)
eps=1e-6;
shat=(diag(var+eps)^(-0.5))*(s-repmat(mean,1,size(s,2)));
end
12 changes: 12 additions & 0 deletions Speech_Recognition/k-layer net w BN/ComputeAccuracy.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
function acc = ComputeAccuracy(X, y, W, b,gamma,beta)
[P,~,~,~,~]=EvaluateClassifier(X,W,b,gamma,beta);
[~,I]= max(P,[],1);
correct=0;
img=size(I,2);
for i=1:img
if I(i)==y(i) %y are the true labels
correct=correct + 1;
end
end
acc= correct/img;
end
18 changes: 18 additions & 0 deletions Speech_Recognition/k-layer net w BN/ComputeCost.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
function J = ComputeCost(X, Y, W, b,lambda,gamma,beta,varargin)
k=size(W,2);
if numel(varargin)==2
[P,~,~,~,~]=EvaluateClassifier(X,W,b,gamma,beta,varargin{1},varargin{2});
else
[P,~,~,~,~]=EvaluateClassifier(X,W,b,gamma,beta);
end
n=size(X,2);
py= Y'*P; %Y is one hot representation of labels
l=-log(py);
L2=0;
for i=1:k
L2=L2+sumsqr(W{i});
end
regularization= lambda*L2;
J= sum(diag(l))/n;
J = J + regularization;
end
35 changes: 35 additions & 0 deletions Speech_Recognition/k-layer net w BN/ComputeGradients.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
function [gradW, gradb,gradGamma,gradBeta] = ComputeGradients(X, Y, P, s, W,lambda,shat,mu,v,gamma,beta)
k=size(W,2);
n=size(X,2);
gradb=cell(1,k); %size(b) is number of layers
gradW=cell(1,k);
Ic=ones(n,1); % n x 1 ones

g= -(Y-P); %Y is K x N and P is K x N
gradW{k}=((1/n)*(g*s{k-1}'))+(2*lambda*W{k});
gradb{k}=((1/n)*(g*Ic));
g=W{k}'*g;
Indsk=s{k-1}>0;
g=g.*Indsk;


for l=k-1:-1:1
gradGamma{l}=((g .* shat{l})*Ic)/n;
gradBeta{l}=(g*Ic)/n;
g=g .* (gamma{l}*Ic');

g=BatchNormBackPass(g,s{l},mu{l},v{l});
if l == 1
si_1=X;
else
si_1=s{l-1};
end
gradW{l}=((g*si_1')/n)+(2*lambda*W{l});
gradb{l}=(g*Ic)/n;
if l > 1
g=(W{l}')*g;
si_1=si_1 > 0;
g=g .* si_1;
end
end
end
71 changes: 71 additions & 0 deletions Speech_Recognition/k-layer net w BN/ComputeGradsNumSlow.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
function [grad_b, grad_W,grad_gamma,grad_beta] = ComputeGradsNumSlow(X, Y, W, b, lambda, h,gamma,beta)

grad_W = cell(1,numel(W));
grad_b = cell(1,numel(b));

grad_gamma = cell(1,numel(gamma));
grad_beta = cell(1,numel(beta));

for j=1:length(b)
grad_b{j} = zeros(size(b{j}));

for i=1:length(b{j})

b_try = b;
b_try{j}(i) = b_try{j}(i) - h;
c1 = ComputeCost(X, Y, W, b_try, lambda,gamma,beta);

b_try = b;
b_try{j}(i) = b_try{j}(i) + h;
c2 = ComputeCost(X, Y, W, b_try, lambda,gamma,beta);

grad_b{j}(i) = (c2-c1) / (2*h);
end
end

for j=1:length(W)
grad_W{j} = zeros(size(W{j}));

for i=1:numel(W{j})

W_try = W;
W_try{j}(i) = W_try{j}(i) - h;
c1 = ComputeCost(X, Y, W_try, b, lambda,gamma,beta);

W_try = W;
W_try{j}(i) = W_try{j}(i) + h;
c2 = ComputeCost(X, Y, W_try, b, lambda,gamma,beta);

grad_W{j}(i) = (c2-c1) / (2*h);
end
end

for j=1:length(gamma)
grad_gamma{j} = zeros(size(gamma{j}));
for i=1:numel(gamma{j})

gammas_try = gamma;
gammas_try{j}(i) = gamma{j}(i) - h;
c1 = ComputeCost(X, Y, W, b, lambda,gammas_try,beta);

gammas_try = gamma;
gammas_try{j}(i) = gamma{j}(i) + h;
c2 = ComputeCost(X, Y, W, b, lambda,gammas_try,beta);

grad_gamma{j}(i) = (c2-c1) / (2*h);
end
end
for j=1:length(beta)
grad_beta{j} = zeros(size(beta{j}));
for i=1:numel(beta{j})

beta_try = beta;
beta_try{j}(i) = beta{j}(i) - h;
c1 = ComputeCost(X, Y, W, b, lambda,gamma,beta_try);

beta_try = beta;
beta_try{j}(i) = beta{j}(i) + h;
c2 = ComputeCost(X, Y, W, b, lambda,gamma,beta_try);
grad_beta{j}(i) = (c2-c1) / (2*h);
end
end
85 changes: 85 additions & 0 deletions Speech_Recognition/k-layer net w BN/ComputeGradsNumSlow1.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
function Grads = ComputeGradsNumSlow1(X, Y, NetParams, lambda, h)

Grads.W = cell(numel(NetParams.W), 1);
Grads.b = cell(numel(NetParams.b), 1);
if NetParams.use_bn
Grads.gammas = cell(numel(NetParams.gammas), 1);
Grads.betas = cell(numel(NetParams.betas), 1);
end

for j=1:length(NetParams.b)
Grads.b{j} = zeros(size(NetParams.b{j}));
NetTry = NetParams;
for i=1:length(NetParams.b{j})
b_try = NetParams.b;
b_try{j}(i) = b_try{j}(i) - h;
NetTry.b = b_try;
c1 = ComputeCost(X, Y, NetTry, lambda);

b_try = NetParams.b;
b_try{j}(i) = b_try{j}(i) + h;
NetTry.b = b_try;
c2 = ComputeCost(X, Y, NetTry, lambda);

Grads.b{j}(i) = (c2-c1) / (2*h);
end
end

for j=1:length(NetParams.W)
Grads.W{j} = zeros(size(NetParams.W{j}));
NetTry = NetParams;
for i=1:numel(NetParams.W{j})

W_try = NetParams.W;
W_try{j}(i) = W_try{j}(i) - h;
NetTry.W = W_try;
c1 = ComputeCost(X, Y, NetTry, lambda);

W_try = NetParams.W;
W_try{j}(i) = W_try{j}(i) + h;
NetTry.W = W_try;
c2 = ComputeCost(X, Y, NetTry, lambda);

Grads.W{j}(i) = (c2-c1) / (2*h);
end
end

if NetParams.use_bn
for j=1:length(NetParams.gammas)
Grads.gammas{j} = zeros(size(NetParams.gammas{j}));
NetTry = NetParams;
for i=1:numel(NetParams.gammas{j})

gammas_try = NetParams.gammas;
gammas_try{j}(i) = gammas_try{j}(i) - h;
NetTry.gammas = gammas_try;
c1 = ComputeCost(X, Y, NetTry, lambda);

gammas_try = NetParams.gammas;
gammas_try{j}(i) = gammas_try{j}(i) + h;
NetTry.gammas = gammas_try;
c2 = ComputeCost(X, Y, NetTry, lambda);

Grads.gammas{j}(i) = (c2-c1) / (2*h);
end
end

for j=1:length(NetParams.betas)
Grads.betas{j} = zeros(size(NetParams.betas{j}));
NetTry = NetParams;
for i=1:numel(NetParams.betas{j})

betas_try = NetParams.betas;
betas_try{j}(i) = betas_try{j}(i) - h;
NetTry.betas = betas_try;
c1 = ComputeCost(X, Y, NetTry, lambda);

betas_try = NetParams.betas;
betas_try{j}(i) = betas_try{j}(i) + h;
NetTry.betas = betas_try;
c2 = ComputeCost(X, Y, NetTry, lambda);

Grads.betas{j}(i) = (c2-c1) / (2*h);
end
end
end
28 changes: 28 additions & 0 deletions Speech_Recognition/k-layer net w BN/EvaluateClassifier.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
function [P,s,shat,mu,v] = EvaluateClassifier(X,W,b,gamma,beta,varargin)
k=size(W,2);
s=cell(1,k);
shat=cell(1,k);
shift=cell(1,k);
n=size(X,2);

if numel(varargin)==2
mu=varargin{1};
v=varargin{2};
else
mu=cell(1,k);
v=cell(1,k);
end

for l=1:k-1
s{l}= W{l}*X + b{l}*ones(1,n);
if numel(varargin)~=2
mu{l}=mean(s{l},2);
v{l}=((var(s{l},0,2)*(n-1)) / n);
end
shat{l}=BatchNormalize(s{l},mu{l},v{l});
shift{l}=repmat(gamma{l},1,size(shat{l},2)) .* shat{l} + repmat(beta{l},1,size(shat{l},2));
X=max(0,shift{l});
end
s{k}= W{k}*X + b{k}*ones(1,n);
P=softmax(s{k});
end
20 changes: 20 additions & 0 deletions Speech_Recognition/k-layer net w BN/InitializeWb.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
function [W,b,gamma,beta] = InitializeWb(m,d)
k=size(m,2); %nr of layers
W=cell(1,size(m,2));
b=cell(1,size(m,2));
gamma=cell(1,size(m,2)-1);
beta=cell(1,size(m,2)-1);
rng(400);
sigma=0; %1e-3 1e-4
sigma2=1e-1;%1/sqrt(d)
for i=k:-1:2
W{i}=normrnd(0,1/sqrt(m(i-1)),m(i),m(i-1)); % k x m
b{i}=zeros(m(i),1);
end
for i=k-1:-1:1
gamma{i}=normrnd(1,0.0005,m(i),1);%ones(m(i),1);
beta{i}=zeros(m(i),1);
end
W{1}=normrnd(0,1/sqrt(d),m(1),d);%*(2/sqrt(m(1))); % m x d random initialization of weights and biases
b{1}=zeros(m(1),1);
end
19 changes: 19 additions & 0 deletions Speech_Recognition/k-layer net w BN/LoadBatch.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
function [X, Y, y] = LoadBatch(filename)
A = load(filename);
X=A.mfccs;
%X = double(X) / double(255);
y=A.labels';
y=cast(y,'single');
Y=A.onehot';
Y=cast(Y,'single');
X=reshape(X, [], size(X,1));
maxX=max(abs(X));

% X=X ./max(maxX);

meanX = mean(X, 2);
stdX = std(X, 0, 2);

X = X - repmat(meanX, [1, size(X, 2)]);
X = X ./ repmat(stdX, [1, size(X, 2)]);
end
67 changes: 67 additions & 0 deletions Speech_Recognition/k-layer net w BN/MiniBatchGD.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
function [W,b,gamma,beta,costs,costsv,xaxis] = MiniBatchGD(X, Y,valx,valy, cycleparams, W, b, lambda,gamma,beta)

%% cyclic learning rate hyperparameters
N=size(X,2);
k=size(W,2);
costs=[];
costsv=[];
xaxis=[];
freq=10; %number of times the cost should be computed per cycle
nmin=cycleparams(1);
nmax=cycleparams(2);
ns=cycleparams(3); % bsize =100 --> 1 cycle=10 epochs
nbatch=cycleparams(4); %ns=k*10000/100 = k*100
epochs=cycleparams(5); %so that training stops after one cycle
lmax=cycleparams(6);
alpha=0.99;
%%
t=0;
for l=0:lmax
for epoch=1:epochs
for j=1:N/nbatch
if (t >= 2*l*ns) && (t <= (2*l+1)*ns)
nt=nmin + ((t-2*l*ns)*(nmax-nmin))/ns;
end
if ((2*l+1)*ns < t) && (2*(l+1)*ns >=t)
nt=nmax - ((t-((2*l+1)*ns))*(nmax-nmin))/ns;
end
jstart = (j-1)*nbatch + 1;
jend = j*nbatch;
inds = jstart:jend;
Xbatch = X(:, jstart:jend);
Ybatch = Y(:, jstart:jend);
[P,s,shat,mu,v]=EvaluateClassifier(Xbatch,W,b,gamma,beta); % K x n
[gW,gb,ggamma,gbeta]=ComputeGradients(Xbatch,Ybatch,P,s,W,lambda,shat,mu,v,gamma,beta);
for i=1:k
W{i}= W{i} - (nt * gW{i});
b{i}= b{i} - (nt * gb{i});
end
for i=1:k-1
gamma{i}= gamma{i} - (nt * ggamma{i});
beta{i}= beta{i} - (nt * gbeta{i});
end
if t==0
movingMu=mu;
movingVar=v;
end
for i=1:size(mu,2)
movingMu{i}=alpha*movingMu{i} + (1-alpha)*mu{i};
movingVar{i}=alpha*movingVar{i} + (1-alpha)*v{i};
end
if mod(t,500)==0 % calculate cost 10 times per cycle, 1 cycle with ns=800 is 1600 updates
costs=[costs, ComputeCost(Xbatch,Ybatch,W,b,lambda,gamma,beta,movingMu,movingVar)]; % which is 16 epochs when
costsv=[costsv, ComputeCost(valx,valy,W,b,lambda,gamma,beta,movingMu,movingVar)]; % batchsize is 100
xaxis=[xaxis, t];
end
t=t+1;
end
seed=randperm(size(X,2)); % shuffle order of data(g)
X=X(:,seed);
Y=Y(:,seed);
end
end
% costs=[costs, ComputeCost(X,Y,W,b,lambda)]; % which is 16 epochs when
% costsv=[costsv, ComputeCost(valx,valy,W,b,lambda)]; % when batchsize is 100
% xaxis=[xaxis, t];

end
Loading

0 comments on commit 3054a17

Please sign in to comment.