-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear_regression.py
79 lines (57 loc) · 2.26 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from __future__ import annotations
import numpy as np
class LinearRegression:
"""
Fit line (weighted linear function) using Least Squares Estimation (matrix inversion)
Attributes
----------
betas : numpy.ndarray
Array of shape (n_features + 1) containing fit coefficients (including intercept)
rsquared : float
Coefficient of determination. Denotes the proportion of the variance in the
dependent variable that is predictable from the independent variable(s)
Methods
-------
fit(X, y)
Estimates the linear model betas (weights/coefficients)
predict(X)
Applies fit model on new data array and returns predictions
"""
def __init__(self):
self.betas = None
self.rsquared = None
def fit(self, X: np.ndarray, y: np.ndarray) -> LinearRegression:
"""
Estimates the linear model betas (weights/coefficients)
Parameters
----------
X : numpy.ndarray
Array of training samples with shape (n_samples, n_features)
y : numpy.ndarray
Array of training targets with shape (n_samples,)
"""
n_samples, n_features = X.shape
X_aug = np.column_stack((np.ones(n_samples), X)) # to account for intercept
k = 1e-6 * X.min() # to avoid inverting a singular matrix
self.betas = np.linalg.inv(X_aug.T.dot(X_aug) + k * np.eye(n_features + 1)).dot(X_aug.T).dot(y)
# R squared
self.rsquared = 1 - ((y - self.predict(X)) ** 2).sum() / ((y - np.mean(y)) ** 2).sum()
return self
def predict(self, X: np.ndarray) -> np.ndarray:
"""
Applies fit model on new data array and returns predictions
Parameters
----------
X : numpy.ndarray
Array of testing samples with shape (n_samples, n_features)
Raises
------
ValueError
The fit() method has to be called first so that the betas
are estimated
"""
if self.betas is None:
raise ValueError("Betas not fitted. Call fit() method first")
n_samples = X.shape[0]
X_aug = np.column_stack((np.ones(n_samples), X))
return X_aug.dot(self.betas)