-
Notifications
You must be signed in to change notification settings - Fork 186
/
Copy pathlinear_regression_SGD.py
102 lines (82 loc) · 2.84 KB
/
linear_regression_SGD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
linear_regression_SGD.py (author: Anson Wong / git: ankonzoid)
"""
import os
import numpy as np
import pandas as pd
class LinearRegressorSGD():
def __init__(self):
pass
def fit(self, X, y):
#
# L = sum[i=1, N] (y_hat - y)^2
#
# y_hat = a0 + a1*x1 + a2*x2 + ... + ad*xd
#
# dL/dxj = sum[i=1, N] 2*(y_hat_i - y_i)*d(y_hat_i - y_i)/daj
# = 2*sum[i=1, N] xj*(y_hat_i - y_i) for j=1,2,...,d
#
# dL/dxj = 2*sum[i=1, N] (y_hat_i - y_i) for j=0
#
# GD: a{i+1} <- a{i} - \eta * dL/dxj
#
print("Fitting...")
# Initialize fit parameters [a0, a1, a2, ..., ad] where a0 is y-intercept
N, d = X.shape
a_fit = np.random.normal(size=d+1)
# Make gradient descent updates
loss_tolerance = 1E-6
fit_tolerance = 1E-4
eta = 1E-6 # learning rate
converge = False
loss = 9E99
iter = 0
while not converge:
# Compute stochastic gradient (MSE loss) by sample one example randomly
i_random = np.random.choice(N)
gradient = np.zeros(d+1)
y_hat_i = a_fit[0] + np.dot(a_fit[1:], X[i_random])
y_i = y[i_random]
gradient[0] += 2*(y_hat_i - y_i)
gradient[1:] += 2*(y_hat_i - y_i)*X[i_random, 0:]
# Perform gradient descent step
a_fit_new = a_fit - eta * gradient
# Compute MSE loss (to keep track)
y_pred = []
for x in X:
y_pred_i = a_fit_new[0] + np.dot(a_fit_new[1:], x)
y_pred.append(y_pred_i)
y_pred = np.array(y_pred)
loss_new = np.linalg.norm(y_pred - y) ** 2 / N
if iter % N:
print("loss = {}".format(loss))
# See if loss and fit parameters have converged
if np.abs(loss_new - loss) < loss_tolerance and np.linalg.norm(a_fit_new - a_fit) < fit_tolerance:
converge = True
# Update fit parameters
a_fit = a_fit_new
loss = loss_new
iter += 1
# Save fit parameters
self.a_fit = a_fit
def predict(self, X):
y_pred = []
for x in X:
y_pred_i = self.a_fit[0] + np.dot(self.a_fit[1:], x)
y_pred.append(y_pred_i)
y_pred = np.array(y_pred)
return y_pred
# Main Driver
if __name__ == "__main__":
X = np.arange(100).reshape(-1, 1)
y = .4 * X.flatten() + 3 + np.random.uniform(-10, 10, size=(100,))
model = LinearRegressorSGD()
model.fit(X, y)
y_pred = model.predict(X)
mse = ((y_pred - y) ** 2).mean(axis=0)
print("mse =", mse)
import matplotlib.pyplot as plt
plt.figure(1)
plt.plot(X.flatten(), y, 'o')
plt.plot(X.flatten(), y_pred, 'r')
plt.show()