-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathcompile_best_experiments.py
More file actions
69 lines (53 loc) · 2.27 KB
/
compile_best_experiments.py
File metadata and controls
69 lines (53 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
''' compiles the best model for each state into a single experiment result file.
'''
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import json
import pandas as pd
from lib import config
from lib.analysis import scoring
# input file name
TAG = 'ath'
best_models = {}
for state in config.STATES:
try:
print state
ath_models = pd.read_csv(config.STATES_DIR + '/{0}/{1}_preds.csv'.format(state, TAG))
ath_ranks = pd.read_csv(config.STATES_DIR + '/{0}/{1}_table.csv'.format(state, TAG))
### isolate best performing model, excluding GFT or AR52 benchmarks ###
# count total number of models in file
assert ath_ranks.SCORE.values[0] == 0, "First row should be a metric with score of 0"
n_models = 0
while True:
if ath_ranks.SCORE.values[n_models + 1] == 0:
break
n_models += 1
# select Pearson correlation scores, excluding benchmarks
tmp_models = ath_ranks.iloc[1:n_models + 1]
tmp_models = tmp_models[~tmp_models.Metric.isin(['GFT','AR52'])]
# select top model
ath_top_model = tmp_models.loc[tmp_models.SCORE.idxmax()].Metric
ath_predictions = ath_models[ath_top_model]
# record best model
best_models[state] = ath_top_model
# compile new prediction file
week = ath_models['Week']
target = ath_models['ILI']
gft = ath_models['GFT']
ar = ath_models['AR52']
final = pd.concat([week, target, gft, ar, ath_predictions], axis=1)
final.columns = ['Week','ILI','GFT','AR52','ARGO']
# save to file
outfile = config.STATES_DIR + '/{0}/top_argo_preds.csv'.format(state)
outtable = config.STATES_DIR + '/{0}/top_argo_table.csv'.format(state)
final.to_csv(outfile, index=False)
# regenerate score table
A = scoring.Scorer(state, show_terminal=True, gft_window=True)
A.results_load(outfile)
A.results_score()
A.results_save(outtable)
except Exception as e:
print e
with open(config.STATES_DIR + '/_overview/best_model_dict.txt', 'w') as f:
json.dump(best_models, f, indent=4)