-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmlp_scratch.py
134 lines (119 loc) · 4.51 KB
/
mlp_scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import numpy as np
from sklearn import datasets,linear_model
import matplotlib.pyplot as plt
# Generate a dataset and plot it
np.random.seed(0)
X,y = datasets.make_moons(200, noise=0.20)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)
plt.show()
# Train the logistic regression classifier
clf=linear_model.LogisticRegressionCV()
clf.fit(X,y)
# Helper function to plot a decision boundary
def plot_decision_boundary(pred_func):
# Set min and max values and give it some padding
x_min,x_max= X[:,0].min()-.5, X[:,0].max()+.5
y_min,y_max= X[:,1].min()-.5, X[:,1].max()+.5
h=0.01
# Generate a grid of points with distance h between them
xx,yy=np.meshgrid(np.arange(x_min,x_max,h),np.arange(y_min,y_max, h))
# Predict the value for the whole grid
Z=pred_func(np.c_[xx.ravel(),yy.ravel()])
Z=Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx,yy,Z,cmap=plt.cm.Spectral)
plt.scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.Spectral)
# Plot the decision boundary
plot_decision_boundary(lambda x: clf.predict(x))
plt.title("Logistic Regression")
plt.show()
num_examples=len(X) # training set size
nn_input_dim=2 # input layer dimensionality
nn_output_dim=2 # output layer dimensionality
# Gradient descent parameters
epsilon=0.01 # learning rate for gradient descent
reg_lambda=0.01 #regularization length
# Helper function to evaluate the total loss on the dataset
def calculate_loss(model):
W1,b1,W2,b2=model['W1'],model['b1'],model['W2'],model['b2']
# Forward propagation to calculate our predictions
z1=X.dot(W1)+b1
a1=np.tanh(z1)
z2=a1.dot(W2)+b2
exp_scores=np.exp(z2)
probs=exp_scores/np.sum(exp_scores, axis=1, keepdims=True)
# Calculating the loss
corect_logprobs=-np.log(probs[range(num_examples),y])
data_loss=np.sum(corect_logprobs)
# Add regularization term to loss (optional)
data_loss+= reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
return 1./num_examples*data_loss
# Helper function to predict an output (0 or 1)
def predict(model, x):
W1,b1,W2,b2=model['W1'],model['b1'],model['W2'],model['b2']
# Forward propagation
z1 = x.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return np.argmax(probs, axis=1)
# This function learns parameters for the neural network and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 100 iterations
def build_model(nn_hdim, num_passes=20000, print_loss=False):
# Initialize the parameters to random values. We need to learn these.
np.random.seed(0)
W1=np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, nn_output_dim))
# This is what we return at the end
model={}
# Gradient descent. For each batch...
for i in range(0, num_passes):
# Forward propagation
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Back propagation
delta3=probs
delta3[range(num_examples), y] -= 1
dW2 = (a1.T).dot(delta3)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
dW1 = np.dot(X.T, delta2)
db1 = np.sum(delta2, axis=0)
# Add regularization terms (b1 and b2 don't have regularization terms)
dW2 += reg_lambda * W2
dW1 += reg_lambda * W1
# Gradient descent parameter update
W1 += -epsilon * dW1
b1 += -epsilon * db1
W2 += -epsilon * dW2
b2 += -epsilon * db2
# Assign new parameters to the model
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
# Optionally print the loss.
# This is expensive because it uses the whole dataset, so we don't want to do it too often.
if print_loss and i % 1000 == 0:
print("Loss after iteration ", i, ": ", calculate_loss(model))
return model
# Build a model with a 3-dimensional hidden layer
model = build_model(3, print_loss=True)
# Plot the decision boundary
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3")
plt.show()
# Varying the hidden layer size
plt.figure(figsize=(16, 32))
hidden_layer_dimensions = [1, 2, 3, 4, 5, 20, 50]
for i, nn_hdim in enumerate(hidden_layer_dimensions):
plt.subplot(4, 2, i+1)
plt.title('Hidden Layer size %d' % nn_hdim)
model = build_model(nn_hdim)
plot_decision_boundary(lambda x: predict(model, x))
plt.show()