-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlearning.py
188 lines (176 loc) · 9.05 KB
/
learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""TODO."""
from __future__ import print_function, absolute_import
import numba
from numba import jit
import numpy as np
import math
import random
from numbskull_extend.inference import draw_sample, eval_factor
@jit(cache=True, nogil=True)
def learnthread(shardID, nshards, step, regularization, reg_param, truncation,
var_copy, weight_copy, weight,
variable, factor, fmap,alpha_bound,tau_bound,sample_list,
vmap, factor_index, Z, fids, var_value, var_value_evid,
weight_value, learn_non_evidence,poential_weight):
"""TODO."""
# Identify start and end variable
nvar = variable.shape[0]
start = (shardID * nvar) // nshards
end = ((shardID + 1) * nvar) // nshards
sample_num = sample_list.shape[0]
for i in range(0,sample_num) :
var_samp = sample_list[i]['vid']
if variable[var_samp]["isEvidence"] == 4:
# This variable is not owned by this machine
continue
sample_and_sgd(var_samp, step, regularization, reg_param, truncation,
var_copy, weight_copy, weight, variable,
factor, fmap, alpha_bound,tau_bound,vmap,
factor_index, Z[shardID], fids[shardID], var_value,
var_value_evid, weight_value, learn_non_evidence,poential_weight)
@jit(nopython=True, cache=True, nogil=True)
def get_factor_id_range(variable, vmap, var_samp, val):
"""TODO."""
varval_off = val
if variable[var_samp]["dataType"] == 0:
varval_off = 0
vtf = vmap[variable[var_samp]["vtf_offset"] + varval_off]
start = vtf["factor_index_offset"]
end = start + vtf["factor_index_length"]
return (start, end)
@jit(nopython=True, cache=True, nogil=True)
def sample_and_sgd(var_samp, step, regularization, reg_param, truncation,
var_copy, weight_copy, weight, variable, factor, fmap,
alpha_bound,tau_bound,vmap, factor_index, Z, fids, var_value, var_value_evid,
weight_value, learn_non_evidence,poential_weight):
"""TODO."""
# 随机梯度下降是用一个样本来近似所有的样本来调整参数theta,计算更快,结果会在最优附近。
# 参数中var_samp是一个索引,truncation意思为截断
# If learn_non_evidence sample twice.
# The method corresponds to expectation-conjugate descent. #期望共轭梯度
if variable[var_samp]["isEvidence"] != 1: # 如果不是观测变量就需要采样
evidence = draw_sample(var_samp, var_copy, weight_copy,
weight, variable, factor,
fmap, vmap, factor_index, Z,
var_value_evid, weight_value)
# If evidence then store the initial value in a tmp variable
# then sample and compute the gradient.
else:
evidence = variable[var_samp]["initialValue"] # 如果是观测变量直接取出初始值就可以用
var_value_evid[var_copy][var_samp] = evidence # var_value_evid中存放的应该是作为证据节点的变量
# Sample the variable
proposal = draw_sample(var_samp, var_copy, weight_copy, weight,
variable, factor, fmap, vmap,
factor_index, Z, var_value, weight_value)
var_value[var_copy][var_samp] = proposal
if not learn_non_evidence and variable[var_samp]["isEvidence"] != 1:
return
# Compute the gradient and update the weights
# Iterate over corresponding factors
range_fids = get_factor_id_range(variable, vmap, var_samp, evidence) # get_factor_id_range返回因子的ID范围
# TODO: is it possible to avoid copying around fids
if evidence != proposal:
range_prop = get_factor_id_range(variable, vmap, var_samp, proposal)
s1 = range_fids[1] - range_fids[0]
s2 = range_prop[1] - range_prop[0]
s = s1 + s2
fids[:s1] = factor_index[range_fids[0]:range_fids[1]]
fids[s1:s] = factor_index[range_prop[0]:range_prop[1]]
fids[:s].sort()
else:
s = range_fids[1] - range_fids[0]
fids[:s] = factor_index[range_fids[0]:range_fids[1]]
truncate = random.random() < 1.0 / truncation if regularization == 1 else False # random()方法返回随机生成的一个实数,它在[0,1)范围内
# go over all factor ids, ignoring dupes #depus翻译:骗子???
last_fid = -1 # numba 0.28 would complain if this were None
for factor_id in fids[:s]: # 遍历所有因子
if factor_id == last_fid:
continue
last_fid = factor_id
weight_id = factor[factor_id]["weightId"]
if weight[weight_id]["isFixed"]: #如果权重是固定的,就不需进行更新
continue
# Compute Gradient
p0 = eval_factor(factor_id, var_samp,
evidence, var_copy,
variable, factor, fmap,
var_value_evid)
p1 = eval_factor(factor_id, var_samp,
proposal, var_copy,
variable, factor, fmap,
var_value)
# print('p0', p0)
# print('p1', p1)
#如果需要参数化
if weight[factor[factor_id]['weightId']]['parameterize']:
x = fmap[factor[factor_id]["ftv_offset"]]['x'] #根据偏移量来找x
theta = fmap[factor[factor_id]["ftv_offset"]]['theta']
a = weight[factor[factor_id]['weightId']]['a']
b = weight[factor[factor_id]['weightId']]['b']
# if variable[var_samp]["isEvidence"]==1:
# if positive_rate > 0.0 and (1-positive_rate) > 0.0:
# omega0 = (1 - 0.5) / (1 - positive_rate)
# omega1 = 0.5 / positive_rate
# omega = omega1 * variable[var_samp]["initialValue"] + omega0 * (1 - variable[var_samp]["initialValue"])
# else:
# omega = 1.0
# else:
# omega = 1.0
gradient1 = (p1 - p0) * theta * factor[factor_id]["featureValue"] * (x - b)
gradient2 = (p1 - p0) * theta * factor[factor_id]["featureValue"] * (-a)
if regularization == 2: # 是否需要正则化
a *= (1.0 / (1.0 + reg_param * step))
a -= step * gradient1
b *= (1.0 / (1.0 + reg_param * step))
b -= step * gradient2
elif regularization == 1:
# Truncated Gradient 截断梯度法
# "Sparse Online Learning via Truncated Gradient" 截断梯度的稀疏在线学习
# Langford et al. 2009
a -= step * gradient1
b -= step * gradient2
if truncate:
l1delta = reg_param * step * truncation
a = max(0, a - l1delta) if a > 0 else min(0, a + l1delta)
b = max(0, b - l1delta) if b > 0 else min(0, b + l1delta)
else:
a -= step * gradient1
b -= step * gradient2
if a < tau_bound[factor[factor_id]['weightId']]['lowerBound']:
a = tau_bound[factor[factor_id]['weightId']]['lowerBound']
elif a > tau_bound[factor[factor_id]['weightId']]['upperBound']:
a = tau_bound[factor[factor_id]['weightId']]['upperBound']
if b > alpha_bound[factor[factor_id]['weightId']]['upperBound']:
b = alpha_bound[factor[factor_id]['weightId']]['upperBound']
elif b < alpha_bound[factor[factor_id]['weightId']]['lowerBound']:
b = alpha_bound[factor[factor_id]['weightId']]['lowerBound']
# elif a>10:
# a = 10
# if b < alpha_bound[factor[factor_id]['weightId']]['bound0']:
# b = alpha_bound[factor[factor_id]['weightId']]['bound0']
# elif b> alpha_bound[factor[factor_id]['weightId']]['bound1']:
# b = alpha_bound[factor[factor_id]['weightId']]['bound1']
w = theta * a * (x - b)
weight[factor[factor_id]['weightId']]['a'] = a
weight[factor[factor_id]['weightId']]['b'] = b
else: #如果不需要参数化
gradient = (p1 - p0) * factor[factor_id]["featureValue"]
# Update weight
w = weight_value[weight_copy][weight_id]
if regularization == 2:
w *= (1.0 / (1.0 + reg_param * step))
w -= step * gradient
elif regularization == 1:
# Truncated Gradient
# "Sparse Online Learning via Truncated Gradient"
# Langford et al. 2009
w -= step * gradient
if truncate:
l1delta = reg_param * step * truncation
w = max(0, w - l1delta) if w > 0 else min(0, w + l1delta)
else:
w -= step * gradient
weight_value[weight_copy][weight_id] = w
weight[factor[factor_id]['weightId']]['initialValue'] = w
if variable[var_samp]["isEvidence"] != 1:
poential_weight[factor[factor_id]['weightId']] = w