-
Notifications
You must be signed in to change notification settings - Fork 21
/
k_mediods.py
53 lines (45 loc) · 1.57 KB
/
k_mediods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
class KMediods:
def fit(self, X, n_clusters, distance):
'''
Parameters
----------
X : shape (n_samples, n_features)
Training data
n_clusters : The number of clusters
distance : Distance algorithm, see also distance.py
Returns
-------
y : shape (n_samples,)
Predicted cluster label per sample.
'''
n_samples = X.shape[0]
self.__distance = distance
distances = np.apply_along_axis(self.__distance, 1, X, X)
centers = np.random.choice(n_samples, n_clusters)
while True:
y = np.argmin(distances[centers], axis=0)
centers_tmp = np.zeros_like(centers)
for i in range(n_clusters):
indexes = np.flatnonzero(y == i)
errors = np.sum(distances[indexes][:, indexes], axis=0)
centers_tmp[i] = indexes[np.argmin(errors)]
if (centers == centers_tmp).all():
break
else:
centers = centers_tmp
self.__centers = X[centers]
return y
def predict(self, X):
'''
Parameters
----------
X : shape (n_samples, n_features)
Predicting data
Returns
-------
y : shape (n_samples,)
Predicted cluster label per sample.
'''
distances = np.apply_along_axis(self.__distance, 1, self.__centers, X).T
return np.argmin(distances, axis=1)