RemilYoucef · sourcery-ai · Apr 29, 2022 · sourcery-ai · Apr 29, 2022 · sourcery-ai
diff --git a/packages/features_importance.py b/packages/features_importance.py
@@ -5,58 +5,58 @@
 import numpy as np
 import matplotlib as mpl
 
-def plot_explanations(W, S_list, nb_subgroups, c, att_names, patt_descriptions) : 
+def plot_explanations(W, S_list, nb_subgroups, c, att_names, patt_descriptions): 
 
-    for j in range(0, nb_subgroups) :
+    for j in range(nb_subgroups):
 
         print(j,'------------------------------------------------')
-        print(patt_descriptions[S_list[j]]) 
+        print(patt_descriptions[S_list[j]])
         coefficients = W[S_list[j]].coef_
         logic = coefficients > 0
         coefficients_abs = np.abs(coefficients)
         contributions = coefficients_abs / np.sum(coefficients_abs, axis = 1).reshape(-1,1)
         features_importance = contributions[c] * 100
         limit = 0.75
-        
+
         f_importance = features_importance[features_importance > limit]
         f_importance = f_importance / np.sum(f_importance) * 100
         f_importance = f_importance.round(2)
         att_names_ = list(pd.Series(att_names[:362])[features_importance > limit])
 
-        
+
         f_importance_1 = f_importance[logic[c][features_importance > limit]]
         att_names_1 = [x for i,x in enumerate (att_names_) if logic[c][features_importance > limit][i]]
-        
+
         f_importance_2 = f_importance[~logic[c][features_importance > limit]]
         att_names_2 = [x for i,x in enumerate (att_names_) if not logic[c][features_importance > limit][i]]
-        
+
         plt.style.use('fivethirtyeight')
         plt.figure(figsize =(3, 4))
         plt.barh(att_names_2, f_importance_2,color='#e74c3c',height=0.65)
-        plt.barh(att_names_1, f_importance_1,color='#1abc9c',height=0.65)        
+        plt.barh(att_names_1, f_importance_1,color='#1abc9c',height=0.65)
         all_f_importance = np.concatenate((f_importance_2,f_importance_1))
-        for i, v in enumerate(all_f_importance) :
-            plt.text(v + 0.4, i, str(v)+'%', fontsize = 9)
-        
+        for i, v in enumerate(all_f_importance):
+            plt.text(v + 0.4, i, f'{str(v)}%', fontsize = 9)
+
         plt.xlabel("Features Importance",fontsize=10)
         plt.xticks(fontsize=10)
         plt.yticks(fontsize=10)
         plt.grid(True, linestyle='--', which='major',color='grey', alpha=0.75)
-        plt.savefig('FIGURES/f_'+str(j))
+        plt.savefig(f'FIGURES/f_{str(j)}')
         plt.show()
 
-def sort_subgroups_support(S,K) :
-	S_copy = S.copy()
-	l_best_s = []
-	for i in range(0,K) :
-		inter = 0
-		s_best = None 
-
-		for s in S_copy :
-			if len(s) > inter :
-				inter = len(s)
-				s_best = s
-		l_best_s.append(s_best)
-		S_copy.remove(s_best)
-	
-	return l_best_s
+def sort_subgroups_support(S,K):
+    S_copy = S.copy()
+    l_best_s = []
+    for _ in range(K):
+        inter = 0
+        s_best = None 
+
+        for s in S_copy :
+        	if len(s) > inter :
+        		inter = len(s)
+        		s_best = s
+        l_best_s.append(s_best)
+        S_copy.remove(s_best)
+
+    return l_best_s
diff --git a/packages/neighbors_generation.py b/packages/neighbors_generation.py
@@ -15,26 +15,26 @@ def cal_covn(data_num, num_size, n) :
     return cov_matrix
 
 
-def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, numerical_cols_compressed, categ_unique, categ_unique_compressed,n_var, model) :
+def generate_all_neighbors(data, data_compressed, n_neigh, numerical_cols, numerical_cols_compressed, categ_unique, categ_unique_compressed,n_var, model):
 
     list_neighs = []
     num_size = numerical_cols.size
     num_size_compressed = numerical_cols_compressed.size
-    
-    n = np.size(data, 0) 
+
+    n = np.size(data, 0)
     covn = cal_covn(data, num_size, n_var)
     covn_compressed = cal_covn(data_compressed, num_size_compressed, n_var)
-    
+
     base = np.zeros(data.shape[1])
     neighbors_base = np.random.multivariate_normal(base, covn, n_neigh)
-    
+
     base_compressed = np.zeros(data_compressed.shape[1])
     neighbors_base_compressed = np.random.multivariate_normal(base_compressed, covn_compressed, n_neigh)
-    
-    for i in range(0,n) :
+
+    for i in range(n):
         neighbors = neighbors_base + data[i]
         neighbors_compressed = neighbors_base_compressed + data_compressed[i]
-        
+
         # for original neighbors 
         j = num_size
         for l in categ_unique :
@@ -47,7 +47,7 @@ def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, nume
                 neighbors[:,j][neighbors[:,j] <= 0] = 0
                 neighbors[:,j][neighbors[:,j] >= 1] = 1
                 j = j + 1
-        
+
         # for compressed neighbors
         k = num_size_compressed
         for l in categ_unique_compressed :
@@ -60,10 +60,10 @@ def generate_all_neighbors (data, data_compressed, n_neigh, numerical_cols, nume
                 neighbors_compressed[:,k][neighbors_compressed[:,k] <= 0] = 0
                 neighbors_compressed[:,k][neighbors_compressed[:,k] >= 1] = 1
                 k = k + 1
-        
+
         neighbors[neighbors < 0] = 0
         neighbors_compressed [neighbors_compressed < 0] = 0
         target = model.predict(neighbors)
         list_neighs.append((neighbors_compressed, target))
-            
+
     return list_neighs
diff --git a/packages/patterns_extraction.py b/packages/patterns_extraction.py
@@ -4,17 +4,14 @@
 
 from neighbors_generation import *
 
-def patterns (P, split_point1, split_point2, data, att_names_) :
+def patterns(P, split_point1, split_point2, data, att_names_):
 
 
-    patt_dict = dict()
-    rank = 0
-    for s,p in P.items() :
+    patt_dict = {}
+    for rank, (s, p) in enumerate(P.items()):
 
-        description = ''
-        it = 0
-        d = dict ()
-        while (it < len(p)) :
+        d = {}
+        for it in range(0, len(p), 3):
             a,op,v = p[it],p[it+1],p[it+2]
             if a not in d :
                 d[a] = [np.min(data[:,a]) ,
@@ -28,35 +25,40 @@ def patterns (P, split_point1, split_point2, data, att_names_) :
                 #update le max
                 d[a][1] = min(v,d[a][1])
 
-            it += 3
-
         print ('subrgoup',rank)
-
+
+        description = ''
         for att, value in d.items():
-            if att < split_point1 : 
+            if (
+                att >= split_point1
+                and att < split_point2
+                and value[1] == 0
+                or att >= split_point1
+                and att >= split_point2
+                and value[0] < 0.5
+            ):
+                print(att_names_[att],"=",'0')
+                description += att_names_[att] + ' = ' + '0' +'  \n'
+            elif (
+                att >= split_point1
+                and att < split_point2
+                or att >= split_point1
+            ):
+                print(att_names_[att],"=",'1')
+                description += att_names_[att] + ' = ' + '1' +'  \n'
+
+            else: 
                 print(round(value[0]*23,0),"<",att_names_[att],"<=",round(value[1]*23,0))
-                description += str(round(value[0]*23,0)) + ' < ' + att_names_[att] + ' <= ' + str(round(value[1]*23,0)) +'  \n'
-
-            elif att < split_point2 :
-                if value[1] == 0 :
-                    print(att_names_[att],"=",'0')
-                    description += att_names_[att] + ' = ' + '0' +'  \n'
-                else :
-                    print(att_names_[att],"=",'1')
-                    description += att_names_[att] + ' = ' + '1' +'  \n'
-
-            else :
-                if value [0] < 0.5  : 
-                    print(att_names_[att],"=",'0')
-                    description += att_names_[att] + ' = ' + '0' +'  \n'
-
-                else :
-                    print(att_names_[att],"=",'1')
-                    description += att_names_[att] + ' = ' + '1' +'  \n'
-
-        patt_dict[s] = description 
+                description += (
+                    f'{str(round(value[0]*23,0))} < '
+                    + att_names_[att]
+                    + ' <= '
+                    + str(round(value[1] * 23, 0))
+                    + '  \n'
+                )
+
+
+        patt_dict[s] = description
         print("-------------------------------------------------------------------")
-        rank += 1
-
     return patt_dict
 
diff --git a/packages/performances.py b/packages/performances.py
@@ -15,21 +15,20 @@ def loss_sd (S,data_test,list_neigh,model, limit) :
         loss += calc_loss(data_neigh_s, target_neigh_s_proba, limit)
     return loss
 
-def loss_global_wb (data_test,list_neigh,model, limit) :
+def loss_global_wb(data_test,list_neigh,model, limit):
 
     n = np.size(data_test,0)
     data_neigh_O, target_neigh_O_proba = sampling_sb(data_test,np.arange(n),list_neigh,model)
-    global_loss = calc_loss(data_neigh_O, target_neigh_O_proba, limit)
-    return global_loss
+    return calc_loss(data_neigh_O, target_neigh_O_proba, limit)
 
 
-def loss_local_models (n,list_neigh,model, limit) :
+def loss_local_models(n,list_neigh,model, limit):
 
     loss = 0
-    for i in range(0,n) :
+    for i in range(n):
         data_neigh_i= list_neigh[i][0]
         target_neigh_i_proba = list_neigh[i][1]
-        loss += calc_loss(data_neigh_i, target_neigh_i_proba, limit)    
+        loss += calc_loss(data_neigh_i, target_neigh_i_proba, limit)
     return loss
 
 def fscore_global_wb (data_test,n,list_neigh,model,nb_classes) :
@@ -43,10 +42,9 @@ def fscore_global_wb (data_test,n,list_neigh,model,nb_classes) :
 
     return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted'))
 
-def fscore_sd (S,data_test,list_neigh,model,nb_classes) :
+def fscore_sd(S,data_test,list_neigh,model,nb_classes):
 
-    iteration = 0 
-    for s in S :
+    for iteration, s in enumerate(S):
         data_neigh_s, target_neigh_s_proba = sampling_sb(data_test,s,list_neigh,model)
         lr = Ridge(alpha = 1)
         model_lr = lr.fit(data_neigh_s,target_neigh_s_proba)
@@ -59,15 +57,12 @@ def fscore_sd (S,data_test,list_neigh,model,nb_classes) :
             a = np.concatenate((a,np.argsort(target_lr, axis=1)[:,-3:]))
             b = np.concatenate((b,np.argsort(target_neigh_s_proba, axis=1)[:,-3:]))
 
-        iteration += 1
-
     return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted'))
 
-def fscore_local_models (data_test,n,list_neigh,model,nb_classes) :
+def fscore_local_models(data_test,n,list_neigh,model,nb_classes):
 
 
-    iteration = 0 
-    for i in range(0,n) :
+    for iteration, i in enumerate(range(n)):
 
         data_neigh_i= list_neigh[i][0]
         target_neigh_i_proba = list_neigh[i][1]
@@ -80,9 +75,7 @@ def fscore_local_models (data_test,n,list_neigh,model,nb_classes) :
         else :
             a = np.concatenate((a,np.argsort(target_lr, axis=1)[:,-3:]))
             b = np.concatenate((b,np.argsort(target_neigh_i_proba, axis=1)[:,-3:]))
-
-        iteration += 1    
-
+
     return (f1_score(a[:,2],b[:,2],average='weighted'), f1_score(a[:,1],b[:,1],average='weighted'), f1_score(a[:,0],b[:,0],average='weighted'))
 
 def unit_vector(vector):
@@ -96,7 +89,7 @@ def angle_between(v1, v2):
     return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
 
 
-def similarity (W,nb_classes) :
+def similarity(W,nb_classes):
 
     l = []
 
@@ -106,26 +99,26 @@ def similarity (W,nb_classes) :
 
 
     distance_matrix = np.zeros(len(l)**2).reshape(len(l),len(l))
-    for i in range (0,len(l)) :
+    for i in range(len(l)):
         for j in range (i,len(l)):
-            for c in range (0,nb_classes) :
+            for c in range(nb_classes):
                 if c == 0 : 
                     v1 = l[i][1].coef_[c]
                     v2 = l[j][1].coef_[c]
                 else :
                     v1 = np.concatenate((v1,l[i][1].coef_[c]),axis=0)
-                    v2 = np.concatenate((v2,l[j][1].coef_[c]),axis=0)                    
+                    v2 = np.concatenate((v2,l[j][1].coef_[c]),axis=0)
             distance_matrix[i,j] = round(math.cos(angle_between(v1,v2)),2)
             distance_matrix[j,i] = distance_matrix[i,j]
 
     return distance_matrix
 
 
-def avg_non_similar (dist,treshold) :
+def avg_non_similar(dist,treshold):
 
-    nb_non_sim = 0 
+    nb_non_sim = 0
     nb_sbgrps = np.size(dist,0)
-    for i in range (0, nb_sbgrps) :
+    for i in range(nb_sbgrps):
         for j in range (i+1, nb_sbgrps) :
             if dist[i,j] <= treshold :
                 nb_non_sim += 1