Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sourcery refactored main branch #1

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions packages/features_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,58 +5,58 @@
import numpy as np
import matplotlib as mpl

def plot_explanations(W, S_list, nb_subgroups, c, att_names, patt_descriptions) :
def plot_explanations(W, S_list, nb_subgroups, c, att_names, patt_descriptions):

for j in range(0, nb_subgroups) :
for j in range(nb_subgroups):

print(j,'------------------------------------------------')
print(patt_descriptions[S_list[j]])
print(patt_descriptions[S_list[j]])
coefficients = W[S_list[j]].coef_
logic = coefficients > 0
coefficients_abs = np.abs(coefficients)
contributions = coefficients_abs / np.sum(coefficients_abs, axis = 1).reshape(-1,1)
features_importance = contributions[c] * 100
limit = 0.75

f_importance = features_importance[features_importance > limit]
f_importance = f_importance / np.sum(f_importance) * 100
f_importance = f_importance.round(2)
att_names_ = list(pd.Series(att_names[:362])[features_importance > limit])


f_importance_1 = f_importance[logic[c][features_importance > limit]]
att_names_1 = [x for i,x in enumerate (att_names_) if logic[c][features_importance > limit][i]]

f_importance_2 = f_importance[~logic[c][features_importance > limit]]
att_names_2 = [x for i,x in enumerate (att_names_) if not logic[c][features_importance > limit][i]]

plt.style.use('fivethirtyeight')
plt.figure(figsize =(3, 4))
plt.barh(att_names_2, f_importance_2,color='#e74c3c',height=0.65)
plt.barh(att_names_1, f_importance_1,color='#1abc9c',height=0.65)
plt.barh(att_names_1, f_importance_1,color='#1abc9c',height=0.65)
all_f_importance = np.concatenate((f_importance_2,f_importance_1))
for i, v in enumerate(all_f_importance) :
plt.text(v + 0.4, i, str(v)+'%', fontsize = 9)
for i, v in enumerate(all_f_importance):
plt.text(v + 0.4, i, f'{str(v)}%', fontsize = 9)

plt.xlabel("Features Importance",fontsize=10)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.grid(True, linestyle='--', which='major',color='grey', alpha=0.75)
plt.savefig('FIGURES/f_'+str(j))
plt.savefig(f'FIGURES/f_{str(j)}')
Comment on lines -8 to +45
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function plot_explanations refactored with the following changes:

plt.show()

def sort_subgroups_support(S,K) :
S_copy = S.copy()
l_best_s = []
for i in range(0,K) :
inter = 0
s_best = None

for s in S_copy :
if len(s) > inter :
inter = len(s)
s_best = s
l_best_s.append(s_best)
S_copy.remove(s_best)
return l_best_s
def sort_subgroups_support(S,K):
S_copy = S.copy()
l_best_s = []
for _ in range(K):
inter = 0
s_best = None

for s in S_copy :
if len(s) > inter :
inter = len(s)
s_best = s
l_best_s.append(s_best)
S_copy.remove(s_best)

return l_best_s
22 changes: 11 additions & 11 deletions packages/neighbors_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,26 @@ def cal_covn(data_num, num_size, n) :
return cov_matrix


def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, numerical_cols_compressed, categ_unique, categ_unique_compressed,n_var, model) :
def generate_all_neighbors(data, data_compressed, n_neigh, numerical_cols, numerical_cols_compressed, categ_unique, categ_unique_compressed,n_var, model):

list_neighs = []
num_size = numerical_cols.size
num_size_compressed = numerical_cols_compressed.size
n = np.size(data, 0)

n = np.size(data, 0)
covn = cal_covn(data, num_size, n_var)
covn_compressed = cal_covn(data_compressed, num_size_compressed, n_var)

base = np.zeros(data.shape[1])
neighbors_base = np.random.multivariate_normal(base, covn, n_neigh)

base_compressed = np.zeros(data_compressed.shape[1])
neighbors_base_compressed = np.random.multivariate_normal(base_compressed, covn_compressed, n_neigh)
for i in range(0,n) :

for i in range(n):
neighbors = neighbors_base + data[i]
neighbors_compressed = neighbors_base_compressed + data_compressed[i]

Comment on lines -18 to +37
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function generate_all_neighbors refactored with the following changes:

# for original neighbors
j = num_size
for l in categ_unique :
Expand All @@ -47,7 +47,7 @@ def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, nume
neighbors[:,j][neighbors[:,j] <= 0] = 0
neighbors[:,j][neighbors[:,j] >= 1] = 1
j = j + 1

# for compressed neighbors
k = num_size_compressed
for l in categ_unique_compressed :
Expand All @@ -60,10 +60,10 @@ def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, nume
neighbors_compressed[:,k][neighbors_compressed[:,k] <= 0] = 0
neighbors_compressed[:,k][neighbors_compressed[:,k] >= 1] = 1
k = k + 1

neighbors[neighbors < 0] = 0
neighbors_compressed [neighbors_compressed < 0] = 0
target = model.predict(neighbors)
list_neighs.append((neighbors_compressed, target))

return list_neighs
70 changes: 36 additions & 34 deletions packages/patterns_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@

from neighbors_generation import *

def patterns (P, split_point1, split_point2, data, att_names_) :
def patterns(P, split_point1, split_point2, data, att_names_):


patt_dict = dict()
rank = 0
for s,p in P.items() :
patt_dict = {}
for rank, (s, p) in enumerate(P.items()):

description = ''
it = 0
d = dict ()
while (it < len(p)) :
d = {}
for it in range(0, len(p), 3):
Comment on lines -7 to +14
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function patterns refactored with the following changes:

a,op,v = p[it],p[it+1],p[it+2]
if a not in d :
d[a] = [np.min(data[:,a]) ,
Expand All @@ -28,35 +25,40 @@ def patterns (P, split_point1, split_point2, data, att_names_) :
#update le max
d[a][1] = min(v,d[a][1])

it += 3

print ('subrgoup',rank)


description = ''
for att, value in d.items():
if att < split_point1 :
if (
att >= split_point1
and att < split_point2
and value[1] == 0
or att >= split_point1
and att >= split_point2
and value[0] < 0.5
):
print(att_names_[att],"=",'0')
description += att_names_[att] + ' = ' + '0' +' \n'
elif (
att >= split_point1
and att < split_point2
or att >= split_point1
):
print(att_names_[att],"=",'1')
description += att_names_[att] + ' = ' + '1' +' \n'

else:
print(round(value[0]*23,0),"<",att_names_[att],"<=",round(value[1]*23,0))
description += str(round(value[0]*23,0)) + ' < ' + att_names_[att] + ' <= ' + str(round(value[1]*23,0)) +' \n'

elif att < split_point2 :
if value[1] == 0 :
print(att_names_[att],"=",'0')
description += att_names_[att] + ' = ' + '0' +' \n'
else :
print(att_names_[att],"=",'1')
description += att_names_[att] + ' = ' + '1' +' \n'

else :
if value [0] < 0.5 :
print(att_names_[att],"=",'0')
description += att_names_[att] + ' = ' + '0' +' \n'

else :
print(att_names_[att],"=",'1')
description += att_names_[att] + ' = ' + '1' +' \n'

patt_dict[s] = description
description += (
f'{str(round(value[0]*23,0))} < '
+ att_names_[att]
+ ' <= '
+ str(round(value[1] * 23, 0))
+ ' \n'
)


patt_dict[s] = description
print("-------------------------------------------------------------------")
rank += 1

return patt_dict

41 changes: 17 additions & 24 deletions packages/performances.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,20 @@ def loss_sd (S,data_test,list_neigh,model, limit) :
loss += calc_loss(data_neigh_s, target_neigh_s_proba, limit)
return loss

def loss_global_wb (data_test,list_neigh,model, limit) :
def loss_global_wb(data_test,list_neigh,model, limit):

n = np.size(data_test,0)
data_neigh_O, target_neigh_O_proba = sampling_sb(data_test,np.arange(n),list_neigh,model)
global_loss = calc_loss(data_neigh_O, target_neigh_O_proba, limit)
return global_loss
return calc_loss(data_neigh_O, target_neigh_O_proba, limit)
Comment on lines -18 to +22
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function loss_global_wb refactored with the following changes:



def loss_local_models (n,list_neigh,model, limit) :
def loss_local_models(n,list_neigh,model, limit):

loss = 0
for i in range(0,n) :
for i in range(n):
data_neigh_i= list_neigh[i][0]
target_neigh_i_proba = list_neigh[i][1]
loss += calc_loss(data_neigh_i, target_neigh_i_proba, limit)
loss += calc_loss(data_neigh_i, target_neigh_i_proba, limit)
Comment on lines -26 to +31
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function loss_local_models refactored with the following changes:

return loss

def fscore_global_wb (data_test,n,list_neigh,model,nb_classes) :
Expand All @@ -43,10 +42,9 @@ def fscore_global_wb (data_test,n,list_neigh,model,nb_classes) :

return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted'))

def fscore_sd (S,data_test,list_neigh,model,nb_classes) :
def fscore_sd(S,data_test,list_neigh,model,nb_classes):

iteration = 0
for s in S :
for iteration, s in enumerate(S):
Comment on lines -46 to +47
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function fscore_sd refactored with the following changes:

data_neigh_s, target_neigh_s_proba = sampling_sb(data_test,s,list_neigh,model)
lr = Ridge(alpha = 1)
model_lr = lr.fit(data_neigh_s,target_neigh_s_proba)
Expand All @@ -59,15 +57,12 @@ def fscore_sd (S,data_test,list_neigh,model,nb_classes) :
a = np.concatenate((a,np.argsort(target_lr, axis=1)[:,-3:]))
b = np.concatenate((b,np.argsort(target_neigh_s_proba, axis=1)[:,-3:]))

iteration += 1

return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted'))

def fscore_local_models (data_test,n,list_neigh,model,nb_classes) :
def fscore_local_models(data_test,n,list_neigh,model,nb_classes):


iteration = 0
for i in range(0,n) :
for iteration, i in enumerate(range(n)):
Comment on lines -66 to +65
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function fscore_local_models refactored with the following changes:


data_neigh_i= list_neigh[i][0]
target_neigh_i_proba = list_neigh[i][1]
Expand All @@ -80,9 +75,7 @@ def fscore_local_models (data_test,n,list_neigh,model,nb_classes) :
else :
a = np.concatenate((a,np.argsort(target_lr, axis=1)[:,-3:]))
b = np.concatenate((b,np.argsort(target_neigh_i_proba, axis=1)[:,-3:]))

iteration += 1


return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted'))

def unit_vector(vector):
Expand All @@ -96,7 +89,7 @@ def angle_between(v1, v2):
return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))


def similarity (W,nb_classes) :
def similarity(W,nb_classes):
Comment on lines -99 to +92
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function similarity refactored with the following changes:


l = []

Expand All @@ -106,26 +99,26 @@ def similarity (W,nb_classes) :


distance_matrix = np.zeros(len(l)**2).reshape(len(l),len(l))
for i in range (0,len(l)) :
for i in range(len(l)):
for j in range (i,len(l)):
for c in range (0,nb_classes) :
for c in range(nb_classes):
if c == 0 :
v1 = l[i][1].coef_[c]
v2 = l[j][1].coef_[c]
else :
v1 = np.concatenate((v1,l[i][1].coef_[c]),axis=0)
v2 = np.concatenate((v2,l[j][1].coef_[c]),axis=0)
v2 = np.concatenate((v2,l[j][1].coef_[c]),axis=0)
distance_matrix[i,j] = round(math.cos(angle_between(v1,v2)),2)
distance_matrix[j,i] = distance_matrix[i,j]

return distance_matrix


def avg_non_similar (dist,treshold) :
def avg_non_similar(dist,treshold):

nb_non_sim = 0
nb_non_sim = 0
nb_sbgrps = np.size(dist,0)
for i in range (0, nb_sbgrps) :
for i in range(nb_sbgrps):
Comment on lines -124 to +121
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function avg_non_similar refactored with the following changes:

for j in range (i+1, nb_sbgrps) :
if dist[i,j] <= treshold :
nb_non_sim += 1
Expand Down