-
Notifications
You must be signed in to change notification settings - Fork 1
/
cluster2.py
66 lines (47 loc) · 1.95 KB
/
cluster2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
class SubspaceCluster:
@staticmethod
def cardinal_interesting(clust, alpha = 1, beta = 1):
return alpha*np.log(len(clust.objs))+ beta*np.log(len(clust.dims))
def __init__(self, objs, dims, **kwargs):
self.objs = np.unique(objs)
self.dims = np.unique(dims)
self.info = kwargs
class SubspaceClustering:
def __init__(self, clusters = []):
self.clusters = clusters
def size(self):
return len(self.clusters)
def add(self, clust):
self.clusters.append(clust)
def __iter__(self):
return iter(self.clusters)
############# SOME HELPER CLASSES & METHODS ################
class ClusteringReader:
def __init__(self, fname, header=True):
self.has_header = header
self.path = fname
self.clts = {}
def set_path(self, fname):
self.path = fname
def load(self):
import os, csv
self.clts.clear()
if os.path.isfile(self.path):
with open(self.path, 'rb') as fi:
rd = csv.DictReader(fi)
for row in rd:
algo = row['algorithm']
params = row['parameters']
objs = row['objects']
dims = row['dimensions']
run = row['run']
clustering_id = '%s-%s' %(algo, run)
#print clustering_id
self.clts.setdefault(clustering_id, SubspaceClustering() )
dimensions = [int(dim) for dim in dims.split(',')]
for obs in objs.split(';'):
objects = [int(obj) for obj in obs.split(',')]
clust = SubspaceCluster(objects, dimensions)
self.clts[clustering_id].add(clust)
return self.clts