-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sourcery refactored main branch #1
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,26 +15,26 @@ def cal_covn(data_num, num_size, n) : | |
return cov_matrix | ||
|
||
|
||
def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, numerical_cols_compressed, categ_unique, categ_unique_compressed,n_var, model) : | ||
def generate_all_neighbors(data, data_compressed, n_neigh, numerical_cols, numerical_cols_compressed, categ_unique, categ_unique_compressed,n_var, model): | ||
|
||
list_neighs = [] | ||
num_size = numerical_cols.size | ||
num_size_compressed = numerical_cols_compressed.size | ||
n = np.size(data, 0) | ||
|
||
n = np.size(data, 0) | ||
covn = cal_covn(data, num_size, n_var) | ||
covn_compressed = cal_covn(data_compressed, num_size_compressed, n_var) | ||
|
||
base = np.zeros(data.shape[1]) | ||
neighbors_base = np.random.multivariate_normal(base, covn, n_neigh) | ||
|
||
base_compressed = np.zeros(data_compressed.shape[1]) | ||
neighbors_base_compressed = np.random.multivariate_normal(base_compressed, covn_compressed, n_neigh) | ||
for i in range(0,n) : | ||
|
||
for i in range(n): | ||
neighbors = neighbors_base + data[i] | ||
neighbors_compressed = neighbors_base_compressed + data_compressed[i] | ||
|
||
Comment on lines
-18
to
+37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
# for original neighbors | ||
j = num_size | ||
for l in categ_unique : | ||
|
@@ -47,7 +47,7 @@ def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, nume | |
neighbors[:,j][neighbors[:,j] <= 0] = 0 | ||
neighbors[:,j][neighbors[:,j] >= 1] = 1 | ||
j = j + 1 | ||
|
||
# for compressed neighbors | ||
k = num_size_compressed | ||
for l in categ_unique_compressed : | ||
|
@@ -60,10 +60,10 @@ def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, nume | |
neighbors_compressed[:,k][neighbors_compressed[:,k] <= 0] = 0 | ||
neighbors_compressed[:,k][neighbors_compressed[:,k] >= 1] = 1 | ||
k = k + 1 | ||
|
||
neighbors[neighbors < 0] = 0 | ||
neighbors_compressed [neighbors_compressed < 0] = 0 | ||
target = model.predict(neighbors) | ||
list_neighs.append((neighbors_compressed, target)) | ||
|
||
return list_neighs |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,17 +4,14 @@ | |
|
||
from neighbors_generation import * | ||
|
||
def patterns (P, split_point1, split_point2, data, att_names_) : | ||
def patterns(P, split_point1, split_point2, data, att_names_): | ||
|
||
|
||
patt_dict = dict() | ||
rank = 0 | ||
for s,p in P.items() : | ||
patt_dict = {} | ||
for rank, (s, p) in enumerate(P.items()): | ||
|
||
description = '' | ||
it = 0 | ||
d = dict () | ||
while (it < len(p)) : | ||
d = {} | ||
for it in range(0, len(p), 3): | ||
Comment on lines
-7
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
a,op,v = p[it],p[it+1],p[it+2] | ||
if a not in d : | ||
d[a] = [np.min(data[:,a]) , | ||
|
@@ -28,35 +25,40 @@ def patterns (P, split_point1, split_point2, data, att_names_) : | |
#update le max | ||
d[a][1] = min(v,d[a][1]) | ||
|
||
it += 3 | ||
|
||
print ('subrgoup',rank) | ||
|
||
|
||
description = '' | ||
for att, value in d.items(): | ||
if att < split_point1 : | ||
if ( | ||
att >= split_point1 | ||
and att < split_point2 | ||
and value[1] == 0 | ||
or att >= split_point1 | ||
and att >= split_point2 | ||
and value[0] < 0.5 | ||
): | ||
print(att_names_[att],"=",'0') | ||
description += att_names_[att] + ' = ' + '0' +' \n' | ||
elif ( | ||
att >= split_point1 | ||
and att < split_point2 | ||
or att >= split_point1 | ||
): | ||
print(att_names_[att],"=",'1') | ||
description += att_names_[att] + ' = ' + '1' +' \n' | ||
|
||
else: | ||
print(round(value[0]*23,0),"<",att_names_[att],"<=",round(value[1]*23,0)) | ||
description += str(round(value[0]*23,0)) + ' < ' + att_names_[att] + ' <= ' + str(round(value[1]*23,0)) +' \n' | ||
|
||
elif att < split_point2 : | ||
if value[1] == 0 : | ||
print(att_names_[att],"=",'0') | ||
description += att_names_[att] + ' = ' + '0' +' \n' | ||
else : | ||
print(att_names_[att],"=",'1') | ||
description += att_names_[att] + ' = ' + '1' +' \n' | ||
|
||
else : | ||
if value [0] < 0.5 : | ||
print(att_names_[att],"=",'0') | ||
description += att_names_[att] + ' = ' + '0' +' \n' | ||
|
||
else : | ||
print(att_names_[att],"=",'1') | ||
description += att_names_[att] + ' = ' + '1' +' \n' | ||
|
||
patt_dict[s] = description | ||
description += ( | ||
f'{str(round(value[0]*23,0))} < ' | ||
+ att_names_[att] | ||
+ ' <= ' | ||
+ str(round(value[1] * 23, 0)) | ||
+ ' \n' | ||
) | ||
|
||
|
||
patt_dict[s] = description | ||
print("-------------------------------------------------------------------") | ||
rank += 1 | ||
|
||
return patt_dict | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,21 +15,20 @@ def loss_sd (S,data_test,list_neigh,model, limit) : | |
loss += calc_loss(data_neigh_s, target_neigh_s_proba, limit) | ||
return loss | ||
|
||
def loss_global_wb (data_test,list_neigh,model, limit) : | ||
def loss_global_wb(data_test,list_neigh,model, limit): | ||
|
||
n = np.size(data_test,0) | ||
data_neigh_O, target_neigh_O_proba = sampling_sb(data_test,np.arange(n),list_neigh,model) | ||
global_loss = calc_loss(data_neigh_O, target_neigh_O_proba, limit) | ||
return global_loss | ||
return calc_loss(data_neigh_O, target_neigh_O_proba, limit) | ||
Comment on lines
-18
to
+22
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
|
||
def loss_local_models (n,list_neigh,model, limit) : | ||
def loss_local_models(n,list_neigh,model, limit): | ||
|
||
loss = 0 | ||
for i in range(0,n) : | ||
for i in range(n): | ||
data_neigh_i= list_neigh[i][0] | ||
target_neigh_i_proba = list_neigh[i][1] | ||
loss += calc_loss(data_neigh_i, target_neigh_i_proba, limit) | ||
loss += calc_loss(data_neigh_i, target_neigh_i_proba, limit) | ||
Comment on lines
-26
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
return loss | ||
|
||
def fscore_global_wb (data_test,n,list_neigh,model,nb_classes) : | ||
|
@@ -43,10 +42,9 @@ def fscore_global_wb (data_test,n,list_neigh,model,nb_classes) : | |
|
||
return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted')) | ||
|
||
def fscore_sd (S,data_test,list_neigh,model,nb_classes) : | ||
def fscore_sd(S,data_test,list_neigh,model,nb_classes): | ||
|
||
iteration = 0 | ||
for s in S : | ||
for iteration, s in enumerate(S): | ||
Comment on lines
-46
to
+47
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
data_neigh_s, target_neigh_s_proba = sampling_sb(data_test,s,list_neigh,model) | ||
lr = Ridge(alpha = 1) | ||
model_lr = lr.fit(data_neigh_s,target_neigh_s_proba) | ||
|
@@ -59,15 +57,12 @@ def fscore_sd (S,data_test,list_neigh,model,nb_classes) : | |
a = np.concatenate((a,np.argsort(target_lr, axis=1)[:,-3:])) | ||
b = np.concatenate((b,np.argsort(target_neigh_s_proba, axis=1)[:,-3:])) | ||
|
||
iteration += 1 | ||
|
||
return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted')) | ||
|
||
def fscore_local_models (data_test,n,list_neigh,model,nb_classes) : | ||
def fscore_local_models(data_test,n,list_neigh,model,nb_classes): | ||
|
||
|
||
iteration = 0 | ||
for i in range(0,n) : | ||
for iteration, i in enumerate(range(n)): | ||
Comment on lines
-66
to
+65
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
data_neigh_i= list_neigh[i][0] | ||
target_neigh_i_proba = list_neigh[i][1] | ||
|
@@ -80,9 +75,7 @@ def fscore_local_models (data_test,n,list_neigh,model,nb_classes) : | |
else : | ||
a = np.concatenate((a,np.argsort(target_lr, axis=1)[:,-3:])) | ||
b = np.concatenate((b,np.argsort(target_neigh_i_proba, axis=1)[:,-3:])) | ||
|
||
iteration += 1 | ||
|
||
|
||
return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted')) | ||
|
||
def unit_vector(vector): | ||
|
@@ -96,7 +89,7 @@ def angle_between(v1, v2): | |
return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) | ||
|
||
|
||
def similarity (W,nb_classes) : | ||
def similarity(W,nb_classes): | ||
Comment on lines
-99
to
+92
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
||
l = [] | ||
|
||
|
@@ -106,26 +99,26 @@ def similarity (W,nb_classes) : | |
|
||
|
||
distance_matrix = np.zeros(len(l)**2).reshape(len(l),len(l)) | ||
for i in range (0,len(l)) : | ||
for i in range(len(l)): | ||
for j in range (i,len(l)): | ||
for c in range (0,nb_classes) : | ||
for c in range(nb_classes): | ||
if c == 0 : | ||
v1 = l[i][1].coef_[c] | ||
v2 = l[j][1].coef_[c] | ||
else : | ||
v1 = np.concatenate((v1,l[i][1].coef_[c]),axis=0) | ||
v2 = np.concatenate((v2,l[j][1].coef_[c]),axis=0) | ||
v2 = np.concatenate((v2,l[j][1].coef_[c]),axis=0) | ||
distance_matrix[i,j] = round(math.cos(angle_between(v1,v2)),2) | ||
distance_matrix[j,i] = distance_matrix[i,j] | ||
|
||
return distance_matrix | ||
|
||
|
||
def avg_non_similar (dist,treshold) : | ||
def avg_non_similar(dist,treshold): | ||
|
||
nb_non_sim = 0 | ||
nb_non_sim = 0 | ||
nb_sbgrps = np.size(dist,0) | ||
for i in range (0, nb_sbgrps) : | ||
for i in range(nb_sbgrps): | ||
Comment on lines
-124
to
+121
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
for j in range (i+1, nb_sbgrps) : | ||
if dist[i,j] <= treshold : | ||
nb_non_sim += 1 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function
plot_explanations
refactored with the following changes:remove-zero-from-range
)use-fstring-for-concatenation
)