-
Notifications
You must be signed in to change notification settings - Fork 2
/
plots_pgbm.py
146 lines (113 loc) · 4.97 KB
/
plots_pgbm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import seaborn as sns
import scipy.stats as ss
import matplotlib.pyplot as plt
MAIN_FOLDER = '../'
def plot_prediction(preds, preds25, preds975, ydata, title, state, train_size, doenca, label, path='quantile_lgbm', save=True):
plt.figure()
plt.plot(ydata[4:], 'k-', label='data')
min_val = min([min(ydata), np.nanmin(preds)])
max_val = max([max(ydata), np.nanmax(preds)])
if train_size != None:
if train_size == len(ydata):
point = ydata.index[train_size-1]
plt.vlines(point, min_val, max_val, 'g', 'dashdot', lw=2, label = 'Train/Test')
else:
point = ydata.index[train_size]
plt.vlines(point, min_val, max_val, 'g', 'dashdot', lw=2, label = 'Train/Test')
pred_window = preds.shape[1]
llist = range(len(ydata.index) - (preds.shape[1]))
# print(type(preds))
# # for figure with all predicted points
# for n in llist:
# plt.vlines(ydata.index[n + pred_window], 0, preds[n][-1], 'b', alpha=0.2)
# plt.plot(ydata.index[n:n + pred_window], preds[n], 'r')
# for figure with only the last prediction point (single red line)
x = []
y = []
y25 = []
y975 = []
for n in llist:
# plt.vlines(ydata.index[n + pred_window], 0, preds[n][-1], 'b', alpha=0.2)
x.append(ydata.index[n + pred_window])
y.append(preds[n][-1])
y25.append(preds25[n][-1])
y975.append(preds975[n][-1])
plt.plot(x, y, 'r-', alpha=0.5, label='median')
# plt.plot(x, y25, 'b-', alpha=0.3)
# plt.plot(x, y975, 'b-', alpha=0.3)
plt.fill_between(x, np.array(y25), np.array(y975), color='b', alpha=0.3)
plt.grid()
plt.ylabel('incidence')
plt.xlabel('time')
#plt.title(title)
plt.xticks(rotation=30)
plt.legend(loc=0)
if save:
plt.savefig(f'{MAIN_FOLDER}/plots/qlgbm_qlgbm_{doenca}_{label}_ss.png',bbox_inches='tight', dpi=300)
plt.show()
return x, y, y25, y975
def plot_transf_prediction(pred_window, preds_t, preds, ydata, title, state, train_size, doenca, path='quantile_lgbm', save=True):
plt.clf()
plt.plot(ydata, 'k-', label='data')
point = ydata.index[train_size]
min_val = min([min(ydata), np.nanmin(preds), np.nanmin(preds_t)])
max_val = max([max(ydata), np.nanmax(preds), np.nanmax(preds_t)])
plt.vlines(point, min_val, max_val, 'g', 'dashdot', lw=2, label = 'Train/Test')
llist = range(len(ydata.index) - (pred_window))
# print(type(preds))
# # for figure with all predicted points
# for n in llist:
# plt.vlines(ydata.index[n + pred_window], 0, preds[n][-1], 'b', alpha=0.2)
# plt.plot(ydata.index[n:n + pred_window], preds[n], 'r')
# for figure with only the last prediction point (single red line)
x = []
yt = []
y = []
for n in llist:
# plt.vlines(ydata.index[n + pred_window], 0, preds[n][-1], 'b', alpha=0.2)
x.append(ydata.index[n + pred_window])
#yt.append(preds_t[n][-1])
#y.append(preds[n][-1])
plt.plot(x, preds_t, 'g-', alpha=0.5, label='median - transf model')
# plt.plot(x, y25, 'b-', alpha=0.3)
# plt.plot(x, y975, 'b-', alpha=0.3)
plt.plot(x, preds, 'r-', alpha=0.5, label='median - chik model')
#plt.text(point, 0.6 * max_val, "Out of sample Predictions")
plt.grid()
plt.ylabel('Weekly cases')
#plt.title('Predictions for {}'.format(title))
plt.xticks(rotation=0)
plt.legend(loc=0)
if save:
plt.savefig(f'{MAIN_FOLDER}/plots/qlgbm/qlgbm_{doenca}_{title}_ss.png', dpi=300)
plt.show()
return None
def predicted_vs_observed(predicted, real, city, state, doenca, model_name, city_name, plot=True):
"""
Plot QQPlot for prediction values
:param plot: generates an saves the qqplot when True (default)
:param predicted: Predicted matrix
:param real: Array of target_col values used in the prediction
:param city: Geocode of the target city predicted
:param state: State containing the city
:param look_back: Look-back time window length used by the model
:param all_predict_n: If True, plot the qqplot for every week predicted
:return:
"""
# Name = get_city_names([city])
# data = get_alerta_table(city, state, doenca=doenca)
obs_preds = np.hstack((predicted, real))
q_p = [ss.percentileofscore(obs_preds, x) for x in predicted]
q_o = [ss.percentileofscore(obs_preds, x) for x in real]
plot_cross_qq(city, doenca, q_o, q_p, model_name, city_name)
return np.array(q_o), np.array(q_p)
def plot_cross_qq(city, doenca, q_o, q_p,model_name, city_name):
ax = sns.kdeplot(q_o[len(q_p) - len(q_o):], q_p, shade=True)
ax.set_xlabel('observed')
ax.set_ylabel('predicted')
ax.set_xlim([0, 100])
ax.set_ylim([0, 100])
plt.plot([0, 100], [0, 100], 'k')
#plt.title(f'Predictions percentiles with {model_name.lower()} for {doenca} at {city_name}')
plt.savefig(f'{MAIN_FOLDER}/plots/qlgbm/qlgbm_cross_qqbplot_{model_name}_{doenca}_{city}.png', dpi=300)