-
Notifications
You must be signed in to change notification settings - Fork 24
/
post_clustering.py
87 lines (75 loc) · 2.58 KB
/
post_clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import numpy as np
from sklearn import cluster
from scipy.sparse.linalg import svds
from sklearn.preprocessing import normalize
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score, adjusted_mutual_info_score
nmi = normalized_mutual_info_score
ami = adjusted_mutual_info_score
ari = adjusted_rand_score
def acc(y_true, y_pred):
"""
Calculate clustering accuracy.
# Arguments
y: true labels, numpy.array with shape `(n_samples,)`
y_pred: predicted labels, numpy.array with shape `(n_samples,)`
# Return
accuracy, in [0,1]
"""
y_true = y_true.astype(np.int64)
assert y_pred.size == y_true.size
D = max(y_pred.max(), y_true.max()) + 1
w = np.zeros((D, D), dtype=np.int64)
for i in range(y_pred.size):
w[y_pred[i], y_true[i]] += 1
# from sklearn.utils.linear_assignment_ import linear_assignment
from scipy.optimize import linear_sum_assignment as linear_assignment
ind_row, ind_col = linear_assignment(w.max() - w)
return sum([w[i, j] for i, j in zip(ind_row, ind_col)]) * 1.0 / y_pred.size
def err_rate(gt_s, s):
return 1.0 - acc(gt_s, s)
def thrC(C, alpha):
if alpha < 1:
N = C.shape[1]
Cp = np.zeros((N, N))
S = np.abs(np.sort(-np.abs(C), axis=0))
Ind = np.argsort(-np.abs(C), axis=0)
for i in range(N):
cL1 = np.sum(S[:, i]).astype(float)
stop = False
csum = 0
t = 0
while (stop == False):
csum = csum + S[t, i]
if csum > alpha * cL1:
stop = True
Cp[Ind[0:t + 1, i], i] = C[Ind[0:t + 1, i], i]
t = t + 1
else:
Cp = C
return Cp
def post_proC(C, K, d, ro):
# C: coefficient matrix, K: number of clusters, d: dimension of each subspace
n = C.shape[0]
C = 0.5 * (C + C.T)
# C = C - np.diag(np.diag(C)) + np.eye(n, n) # good for coil20, bad for orl
r = d * K + 1
U, S, _ = svds(C, r, v0=np.ones(n))
U = U[:, ::-1]
S = np.sqrt(S[::-1])
S = np.diag(S)
U = U.dot(S)
U = normalize(U, norm='l2', axis=1)
Z = U.dot(U.T)
Z = Z * (Z > 0)
L = np.abs(Z ** ro)
L = L / L.max()
L = 0.5 * (L + L.T)
spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',
assign_labels='discretize')
spectral.fit(L)
grp = spectral.fit_predict(L)
return grp, L
def spectral_clustering(C, K, d, alpha, ro):
C = thrC(C, alpha)
y, _ = post_proC(C, K, d, ro)
return y