forked from fangshi1991/gplearn_stock
-
Notifications
You must be signed in to change notification settings - Fork 2
/
fitness.py
177 lines (139 loc) · 6.82 KB
/
fitness.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# encoding:utf-8
"""Metrics to evaluate the fitness of a program.
The :mod:`gplearn.fitness` module contains some metric with which to evaluate
the computer programs created by the :mod:`gplearn.genetic` module.
"""
# Author: Trevor Stephens <trevorstephens.com>
#
# License: BSD 3 clause
import numbers
import numpy as np
from joblib import wrap_non_picklable_objects
from scipy.stats import rankdata
__all__ = ['make_fitness']
class _Fitness(object):
"""A metric to measure the fitness of a program.
This object is able to be called with NumPy vectorized arguments and return
a resulting floating point score quantifying the quality of the program's
representation of the true relationship.
Parameters
----------
function : callable
A function with signature function(y, y_pred, sample_weight) that
returns a floating point number. Where `y` is the input target y
vector, `y_pred` is the predicted values from the genetic program, and
sample_weight is the sample_weight vector.
greater_is_better : bool
Whether a higher value from `function` indicates a better fit. In
general this would be False for metrics indicating the magnitude of
the error, and True for metrics indicating the quality of fit.
"""
def __init__(self, function, greater_is_better,stock_is = None):
self.function = function
self.stock_is = stock_is
self.greater_is_better = greater_is_better
self.sign = 1 if greater_is_better else -1
def __call__(self, *args):
return self.function(*args)
def make_fitness(function, greater_is_better, wrap=True):
"""Make a fitness measure, a metric scoring the quality of a program's fit.
This factory function creates a fitness measure object which measures the
quality of a program's fit and thus its likelihood to undergo genetic
operations into the next generation. The resulting object is able to be
called with NumPy vectorized arguments and return a resulting floating
point score quantifying the quality of the program's representation of the
true relationship.
Parameters
----------
function : callable
A function with signature function(y, y_pred, sample_weight) that
returns a floating point number. Where `y` is the input target y
vector, `y_pred` is the predicted values from the genetic program, and
sample_weight is the sample_weight vector.
greater_is_better : bool
Whether a higher value from `function` indicates a better fit. In
general this would be False for metrics indicating the magnitude of
the error, and True for metrics indicating the quality of fit.
wrap : bool, optional (default=True)
When running in parallel, pickling of custom metrics is not supported
by Python's default pickler. This option will wrap the function using
cloudpickle allowing you to pickle your solution, but the evolution may
run slightly more slowly. If you are running single-threaded in an
interactive Python session or have no need to save the model, set to
`False` for faster runs.
"""
if not isinstance(greater_is_better, bool):
raise ValueError('greater_is_better must be bool, got %s'
% type(greater_is_better))
if not isinstance(wrap, bool):
raise ValueError('wrap must be an bool, got %s' % type(wrap))
if function.__code__.co_argcount != 3:
raise ValueError('function requires 3 arguments (y, y_pred, w),'
' got %d.' % function.__code__.co_argcount)
if not isinstance(function(np.array([1, 1]),
np.array([2, 2]),
np.array([1, 1])), numbers.Number):
raise ValueError('function must return a numeric.')
if wrap:
return _Fitness(function=wrap_non_picklable_objects(function),
greater_is_better=greater_is_better)
return _Fitness(function=function,
greater_is_better=greater_is_better)
def _weighted_pearson(y, y_pred, w):
"""Calculate the weighted Pearson correlation coefficient."""
with np.errstate(divide='ignore', invalid='ignore'):
y_pred_demean = y_pred - np.average(y_pred, weights=w)
y_demean = y - np.average(y, weights=w)
corr = ((np.sum(w * y_pred_demean * y_demean) / np.sum(w)) /
np.sqrt((np.sum(w * y_pred_demean ** 2) *
np.sum(w * y_demean ** 2)) /
(np.sum(w) ** 2)))
if np.isfinite(corr):
return np.abs(corr)
return 0.
def _weighted_spearman(y, y_pred, w):
"""Calculate the weighted Spearman correlation coefficient."""
y_pred_ranked = np.apply_along_axis(rankdata, 0, y_pred)
y_ranked = np.apply_along_axis(rankdata, 0, y)
return _weighted_pearson(y_pred_ranked, y_ranked, w)
def _mean_absolute_error(y, y_pred, w):
"""Calculate the mean absolute error."""
return np.average(np.abs(y_pred - y), weights=w)
def _mean_square_error(y, y_pred, w):
"""Calculate the mean square error."""
return np.average(((y_pred - y) ** 2), weights=w)
#为了不破坏整体的结构,以原结构进行比较
def _stock_dedicated(y,y_pred,w):
return np.average(y_pred,weights = w)
def _root_mean_square_error(y, y_pred, w):
"""Calculate the root mean square error."""
return np.sqrt(np.average(((y_pred - y) ** 2), weights=w))
def _log_loss(y, y_pred, w):
"""Calculate the log loss."""
eps = 1e-15
inv_y_pred = np.clip(1 - y_pred, eps, 1 - eps)
y_pred = np.clip(y_pred, eps, 1 - eps)
score = y * np.log(y_pred) + (1 - y) * np.log(inv_y_pred)
return np.average(-score, weights=w)
weighted_pearson = _Fitness(function=_weighted_pearson,
greater_is_better=True)
weighted_spearman = _Fitness(function=_weighted_spearman,
greater_is_better=True)
mean_absolute_error = _Fitness(function=_mean_absolute_error,
greater_is_better=False)
mean_square_error = _Fitness(function=_mean_square_error,
greater_is_better=False)
root_mean_square_error = _Fitness(function=_root_mean_square_error,
greater_is_better=False)
log_loss = _Fitness(function=_log_loss,
greater_is_better=False)
stock_dedicated = _Fitness(function=_stock_dedicated,
greater_is_better=True,
stock_is = True)
_fitness_map = {'pearson': weighted_pearson,
'spearman': weighted_spearman,
'mean absolute error': mean_absolute_error,
'mse': mean_square_error,
'rmse': root_mean_square_error,
'log loss': log_loss,
'stock_dedicated':stock_dedicated}