-
Notifications
You must be signed in to change notification settings - Fork 0
/
Analysis.py
129 lines (106 loc) · 4.67 KB
/
Analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import numpy as np
import csv
from ast import literal_eval
from Parameters import *
import Parameters
def simplest_type(s):
try:
return literal_eval(s)
except:
return s
def read_data(path):
with open(path, "r") as f:
reader = csv.reader(f, skipinitialspace = True)
data = []
for row in reader:
row = list(map(simplest_type, row))
data.append(row)
header = data.pop(0)
return header, data
# Calculates total average running times over all experiments
def total_avg_running_time(data):
total_mod = 0
total_map = 0
count = 0
for scenario in data:
for experimentData in data[scenario]:
total_mod += experimentData[7]
total_map += experimentData[8]
count += 1
print("Total average modularity running time:", str(round(total_mod / count, 2)), "s")
print("Total average map equation running time:", str(round(total_map / count, 2)), "s")
# Calculates the average difference in amount of communities found over all experiments
def total_avg_community_count_difference(data):
total_mod = 0
total_map = 0
count = 0
for scenario in data:
for experimentData in data[scenario]:
total_mod += experimentData[3] - experimentData[2]
total_map += experimentData[4] - experimentData[2]
count += 1
print("Total average error finding the right amount of communities by modularity:", str(round(total_mod / count, 2)))
print("Total average error finding the right amount of communities by the map equation:", str(round(total_map / count, 2)))
# Calculates the average difference in NMI score between modularity and the map equation
def total_avg_nmi_difference(data):
total_mod = 0
total_map = 0
count = 0
for scenario in data:
for experimentData in data[scenario]:
total_mod += experimentData[5]
total_map += experimentData[6]
count += 1
print("Total average NMI score of modularity:", str(round(total_mod / count, 2)))
print("Total average NMI score of the map equation:", str(round(total_map / count, 2)))
# Compares nmi scores between varying community size situations and without
def nmi_varying_vs_not(data):
total_varying = 0
total_not = 0
count_varying = 0
count_not = 0
for scenario in data:
for experimentData in data[scenario]:
if scenario.endswith("varying"):
total_varying += experimentData[6] - experimentData[5]
count_varying += 1
else:
total_not += experimentData[6] - experimentData[5]
count_not += 1
print("Total average difference in NMI scores between the map equation and modularity when community sizes vary:", str(round(total_varying / count_varying, 2)))
print("Total average difference in NMI scores between the map equation and modularity when community sizes do not vary:", str(round(total_not / count_not, 2)))
# Compares nmi scores between highly interconnected communities situations and without
def nmi_interconnected_vs_not(data):
total_interconnected_mod = 0
total_not_mod = 0
total_interconnected_map = 0
total_not_map = 0
count_interconnected = 0
count_not = 0
for scenario in data:
for experimentData in data[scenario]:
if scenario.endswith("interconnected") or scenario == "random_interconnected":
total_interconnected_mod += experimentData[5]
total_interconnected_map += experimentData[6]
count_interconnected += 1
else:
total_not_mod += experimentData[5]
total_not_map += experimentData[6]
count_not += 1
print("Total average NMI score for modularity in highly interconnected scenarios:", str(round(total_interconnected_mod / count_interconnected, 2)))
print("Total average NMI score for the map equation in highly interconnected scenarios:", str(round(total_interconnected_map / count_interconnected, 2)))
print("Total average NMI score for modularity in other scenarios:", str(round(total_not_mod / count_not, 2)))
print("Total average NMI score for the map equation in other scenarios:", str(round(total_not_map / count_not, 2)))
data = {}
for name, _ in Parameters.params.items():
try:
(headers, fileData) = read_data(os.path.join("Results", name + ".csv"))
except:
print("Error: '" + name + ".csv' is missing")
data[name] = fileData
total_avg_running_time(data)
total_avg_community_count_difference(data)
total_avg_nmi_difference(data)
nmi_varying_vs_not(data)
nmi_interconnected_vs_not(data)