-
Notifications
You must be signed in to change notification settings - Fork 0
/
find_top_n.py
executable file
·88 lines (74 loc) · 3.18 KB
/
find_top_n.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3
import argparse
import heapq
import json
import natsort
import os
import pandas
from pandas.io.json import json_normalize
def main():
parser = argparse.ArgumentParser("Find top n measurements of every metric ")
parser.add_argument('--path', '-p', required=True, help='Path where to find all measurements')
parser.add_argument('--model', '-m', required=True, help='Name of the model for all measurements')
parser.add_argument('--n', '-n', required=True, help='Number of top elements to show')
args = parser.parse_args()
find_top_n(args.path, args.model, args.n)
def find_top_n(path, model, n):
n = int(n)
folderlist = [f for f in os.listdir(path) if not os.path.isfile(os.path.join(path, f))]
datadf = pandas.DataFrame()
print('Reading data')
for folder in folderlist:
# Get the data itself
with open (os.path.join(path, folder, model+"_metrics.json")) as fmetric:
jdata = json.load(fmetric)
ndata = json_normalize(jdata)
ndata = ndata.filter([
'precision.mean', 'precision.sd',
'recall.mean', 'recall.sd',
'f1_score.mean', 'f1_score.sd',
'auc.mean', 'auc.sd',
'training_time_mean'
])
# as well as the call params
try:
with open(os.path.join(path, folder, 'call_parameters.txt')) as cparms:
call_vals = None
for line in cparms:
try:
call_vals = line.split('[')[1].split(']')[0].replace("'", "")
call_vals = call_vals.split(', ')
call_df = pandas.DataFrame(data=[call_vals])
call_df['folder_name'] = folder
ndata = pandas.concat([ndata, call_df], axis=1)
datadf = datadf.append(ndata, ignore_index=True, sort=False)
except IndexError:
pass # TODO: Find a more efficient alternative
except FileNotFoundError:
pass
print("Top {} by F1 score:".format(n))
_print_as_ltx_table(datadf.nlargest(n, 'f1_score.mean'))
print("Top {} by Precision:".format(n))
_print_as_ltx_table(datadf.nlargest(n, 'precision.mean'))
print("Top {} by Recall:".format(n))
_print_as_ltx_table(datadf.nlargest(n, 'recall.mean'))
print("Top {} by AUC:".format(n))
_print_as_ltx_table(datadf.nlargest(n, 'auc.mean'))
def _print_as_ltx_table(pandasDF):
print("Layout: row 0 & row 1 & row 2 & row3 & Prec & Recall & F1 pm F1.sd & auc & runtime_mean & folder_name")
for index, row in pandasDF.iterrows():
print("{} & {} & {} & {} & {:4.2f} & {:4.2f} & {:4.2f} $\pm$ {:4.2f} & {:4.2f} & {} & {}\\\\".format(
row[0],
row[1],
row[2],
row[3],
float(row['precision.mean'])*100,
float(row['recall.mean'])*100,
float(row['f1_score.mean'])*100,
float(row['f1_score.sd'])*100,
float(row['auc.mean'])*100,
row['training_time_mean'],
row['folder_name'],
))
if __name__ == '__main__':
main()