-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
33 lines (26 loc) · 987 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from preprocessing import *
NUMBER_OF_FUNCTIONS = 100
Q = [2] #[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
def jaccard():
for q in Q:
print ""
print "q = ", q
M = generate_M(q)
idx_of_ones_on_first_column = np.where(M[:, 0]==1)[0]
Ms = generate_Ms(q, NUMBER_OF_FUNCTIONS, M)
first_ms_column = Ms[:, 0]
for j in range(1, M.shape[1]):
# for M
intersection_size = np.where(M[:, j][idx_of_ones_on_first_column] == 1)[0].size
union_size = idx_of_ones_on_first_column.size + np.where(M[:, j]==1)[0].size - intersection_size
jaccard_M = intersection_size * 1.0 / union_size
# for Ms
count = 0.0
next_column = Ms[:, j]
for i in range(Ms.shape[0]):
if first_ms_column[i] == next_column[i]:
count += 1.0
jaccard_Ms = count / NUMBER_OF_FUNCTIONS
print "Similarity between D_1 and D_" + str(j+1) + ": is ", jaccard_M, " using M and ", jaccard_Ms, " using Ms"
if __name__ == "__main__":
jaccard()