-
Notifications
You must be signed in to change notification settings - Fork 0
/
testing.py
41 lines (36 loc) · 1.67 KB
/
testing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from multiple_linear_regression import LinearRegression
import numpy as np
X = np.column_stack((np.random.randn(1000), np.random.randn(1000)))
y = 20 + (9 * X[:, 0]) + (5 * X[:, 1]) + np.random.randn(1000)
# Testing the multiple linear regression
model = LinearRegression()
model.fit(X, y)
print("weight coefs: ", model.weight_vector)
print("bias term: ", model.bias_term)
print("covariance matrix: ", model.cov_matrix)
print("t stats: ", model.t_statistics)
print("f_ stat: ", model.f_statistic)
print("p value from f stat: ", model.p_value)
print("p_values: ", model.p_values)
print("cooks distances: ", model.cooks_distance)
cookie = model.cooks_distance
print(len(cookie))
print(type(cookie))
# Now do a test for the cooks distance categorical term...
unique_categories = np.array(['A', 'B', 'C'])
new_column = np.zeros((X.shape[0], 1), dtype=object)
# assign random category to the new column
for i in range(X.shape[0]):
np.random.shuffle(unique_categories) # shuffle anew each time
new_column[i] = unique_categories[0] # select the first one. Shuffling each time should ensure some randomness
X_cat = np.hstack((X, new_column))
# Now test the categorical option of linear regression
model2 = LinearRegression()
model2.fit(X_cat, y, category=2)
print(model2.weight_vector)
print(model2.cooks_distance)
# Now let us calculate the average cooks distance per category (A, B, C):
print("Mean Cook's Distance Category A:", np.mean(model2.cooks_distance['A']))
print("Mean Cook's Distance Category B:", np.mean(model2.cooks_distance['B']))
print("Mean Cook's Distance Category C:", np.mean(model2.cooks_distance['C']))
# THey all have relatively the same cooks distance! C is the highest though...