-
Notifications
You must be signed in to change notification settings - Fork 0
/
export_latex.py
82 lines (63 loc) · 2.5 KB
/
export_latex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
import math
from pathlib import Path
if __name__ == '__main__':
basepath = Path('results', 'reports')
csv_path = basepath / 'ari_scores.csv'
dadc_path = Path('results', 'reports', 'ari_DADC.csv')
df = pd.read_csv(csv_path) \
.set_index('dataset')
dadc_rw_df = pd.read_csv(dadc_rw_path) \
.set_index('dataset')
dadc_syn_df = pd.read_csv(dadc_syn_path) \
.set_index('dataset')
dadc_df = pd.concat([dadc_rw_df, dadc_syn_df], axis=0)
dadc_df = dadc_df.rename({'ari': 'DADC'}, axis=1)
dadc_df.index = dadc_df.index + '.arff'
df = df.merge(dadc_df, left_index=True, right_index=True, how='outer')
mask = df.index.isin(to_keep)
df = df.loc[mask]
def add_type(row):
t = 'R' if row.name in to_keep_rw else 'S'
row['Type'] = t
return row
df = df.apply(add_type, axis=1) \
.sort_values(['Type', 'dataset'], ascending=[False, True])
col_order = ['BridgeClustering', 'DBSCAN', 'HDBSCAN', 'BorderPeelingWrapper', 'DenMuneWrapper', 'AUTOCLUST', 'OPTICS', 'DADC']
renamer = {
'BridgeClustering': 'BAC',
'BorderPeelingWrapper': 'BP',
'DenMuneWrapper': 'DenMune'
}
df = df[col_order].rename(renamer, axis=1)
def formatter(row):
maxval = None
for r in row.index:
if r == 'Type':
continue
maxval = max(maxval, row[r]) if not maxval is None else row[r]
for r in row.index:
if r == 'Type':
continue
val = row[r]
if val == maxval:
row[r] = f'\\textbf{{{val:.3f}}}'
elif math.isnan(val):
row[r] = 'N/A'
else:
row[r] = f'{val:.3f}'
return row
mean_all, median_all = df.mean(), df.median()
mask = ~df.isna().any(axis=1)
mean_nan, median_nan = df.loc[mask].mean(), df.loc[mask].median()
mean_all.name = 'Mean (All)'
median_all.name = 'Median (All)'
mean_nan.name = f'Mean ({mask.sum()} datasets)'
median_nan.name = f'Median ({mask.sum()} datasets)'
df.index = df.index.str[:-5]
df.loc[f'Mean ({mask.sum()} datasets)'] = mean_nan
df.loc[f'Median ({mask.sum()} datasets)'] = median_nan
df.loc['Mean (All)'] = mean_all
df.loc['Median (All)'] = median_all
df = df.apply(formatter, axis=1)
df.to_latex(basepath / 'table.tex', escape=False)