-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregression.py
150 lines (115 loc) · 5.35 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import numpy as np
class BaseRegression:
def __init__(self, dataset):
self.dataset = dataset
# Use last column as a output. Others are input.
self.inputs = self.dataset[..., :-1]
self.outputs = self.dataset[..., -1:]
# Store len of training sets and count of features.
self.len_dataset, self.features_count = self.inputs.shape
# Create randomized weight vector.
self.weights = np.random.uniform(0, 1, self.features_count + 1)
# Epoch of training.
self.epoch = 1
# Some prepared matrix/vectors for future usage:
# - Matrix of input data with 1 as a first value: Use in prediction of training inputs and gradient.
self.input_vectors = np.concatenate([np.ones((self.len_dataset, 1)), self.inputs], axis=1)
# - Tranposed input vectors: used in gradient and normal equation
self.trans_input = np.transpose(self.input_vectors)
# - Flatten output: use in error.
self.output_vector = self.outputs.flatten()
# - Clone output vector to matrix with len(weights) rows
self.cloned_output = np.tile(self.output_vector, (len(self.weights), 1))
def hypothesis(self, vector):
raise NotImplemented
def calculate(self, vector):
# proxy method for hypothesis.
return self.hypothesis(vector)
@property
def error(self):
raise NotImplemented
def normal_equation(self):
raise NotImplemented
@property
def accuracy(self):
# Calculate accuracy based on error function.
return round(100.00 - self.error * 100.00, 4)
@property
def predicted(self):
# Calculate training set with weight vector.
return np.array([self.hypothesis(x) for x in self.inputs])
@property
def cloned_predicted(self):
# Clone predicted vector to matrix with len(weights) rows
return np.tile(self.predicted, (len(self.weights), 1))
@property
def regularization(self, delta=0.1):
return (delta / self.len_dataset) * np.power(self.weights, 2).sum()
def gradient(self, speed):
# Gradient Descent iteration function.
permit = speed / self.len_dataset
self.weights -= ((self.cloned_predicted - self.cloned_output) * self.trans_input).sum(axis=1) * permit
self.epoch += 1
def train_gradient(self, timeout=3000):
# Gradient Descent method with automation decreasing speed rate.
speed = 0.1
iteration = 0
latest_error = 1
while iteration <= timeout:
error = self.error
if -0.0001 < error < 0.0001:
break
if error > latest_error:
speed = speed * 0.5
self.gradient(speed)
latest_error = error
iteration += 1
def show_info(self, show_predicted_vs_output=False):
predicted_vs_putput = ""
if show_predicted_vs_output:
predicted_vs_putput = [f"{p} - {o}\n" for p, o in zip(self.predicted, self.output_vector)]
print(f"========== {self.__class__.__name__} ========= \n"
f"Count of Features: {self.features_count} | Len of Dataset: {self.len_dataset} | Epoch: {self.epoch} \n"
f"Weights: {self.weights}\n"
f"Accuracy: {self.accuracy}% | Error: {self.error}\n"
f"{''.join(predicted_vs_putput)}"
f"=====================================\n"
)
class LinearRegression(BaseRegression):
@property
def error(self):
# Calculate squared error of training set. Scalar value.
return (1 / (self.len_dataset * 2)) * np.power(self.predicted - self.output_vector, 2).sum()
def hypothesis(self, vector):
# Calculate value of input vector based on current weight
return np.dot(self.weights, np.concatenate([np.ones(1), vector])).sum()
def normal_equation(self):
# Normal Equation method
theta = np.linalg.inv(self.trans_input.dot(self.input_vectors)).dot(self.trans_input).dot(self.outputs)
self.weights = theta.flatten()
class LogisticRegression(BaseRegression):
def hypothesis(self, vector):
# Sigmoid activation of calculated values of input vector based on current weight.
doted = np.dot(self.weights, np.concatenate([np.ones(1), vector]))
return 1.0 / (1 + np.exp(-doted))
@property
def error(self):
# Calculate error of training set. Scalar value.
cost = -self.output_vector * np.log(self.predicted) - (1 - self.output_vector) * np.log(1 - self.predicted)
return cost.sum() / self.len_dataset
class SVM(LogisticRegression):
def __init__(self, dataset):
super().__init__(dataset)
self.weights = np.zeros(self.features_count)
self.weights = np.random.uniform(0, 1, self.features_count)
def hypothesis(self, vector):
return np.dot(self.weights, vector)
def train_gradient(self, timeout=30000):
# Gradient Descent method with automation decreasing speed rate.
speed = 1
for i in range(1, timeout):
for x, y in zip(self.inputs, self.outputs):
if y * self.hypothesis(x) < 1:
self.weights = self.weights + speed * ((y * x) - (2 * (1 / timeout) * self.weights))
else:
self.weights = self.weights + speed * (-2 * (1 / timeout) * self.weights)