-
Notifications
You must be signed in to change notification settings - Fork 0
/
logic.py
129 lines (106 loc) · 4.86 KB
/
logic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
import pandas as pd
import os
import datetime
import pickle
from sklearn.ensemble import RandomForestRegressor
#############
### Get Avaliable Models Index
files = np.array(os.listdir("dash/"))
files = [file for file in files if '.sav' in file]
models = {'station':[x[0:4] for x in files],
'variable':[x[5:9] for x in files],
'model':files}
models = pd.DataFrame(models).sort_values(by='station').reset_index(drop=True)
###############
### Utils For Models Prediction
def prepare_pred_data(df,station,variable):
print('Preparing Data For {} {} Prediction'.format(station,variable),end="\n\n")
df_air = df[df['station']==station].sort_values(by=['day_hour'],ascending=True).reset_index(drop=True).drop_duplicates(subset='day_hour')
## Save date_hour
print("Preparing Model Inputs")
date = df_air[['day_hour']]
## Select only numeric variables
if variable=='vsby':
numeric_cols = ['tmpf','dwpf','relh','drct','sknt','alti','skyl1']
df_num = df_air[numeric_cols]
else:
numeric_cols = ['tmpf','dwpf','relh','drct','sknt','alti','vsby']
df_num = df_air[numeric_cols]
print('Preparing Data For {} {} Prediction'.format(station,variable),end="\n\n")
df_air = df[df['station']==station].sort_values(by=['day_hour'],ascending=True).reset_index(drop=True).drop_duplicates(subset='day_hour')
## Save date_hour
print("Preparing Model Inputs")
date = df_air[['day_hour']]
## Select only numeric variables
if variable=='vsby':
numeric_cols = ['tmpf','dwpf','relh','drct','sknt','alti','skyl1']
df_num = df_air[numeric_cols]
else:
numeric_cols = ['tmpf','dwpf','relh','drct','sknt','alti','vsby']
df_num = df_air[numeric_cols]
## Lag Data
lagged_lists = []
for i in [0,1,2,3,4,5,6,18,19]:
lag = df_num.shift(periods=i)
lag.columns = [col+'_{}'.format(i+6) for col in lag.columns]
lagged_lists.append(lag)
df_fin = pd.concat([date]+lagged_lists,axis=1)
df_fin = df_fin.sort_values(by=['day_hour'],ascending=False).reset_index(drop=True)
df_fin.dropna(inplace=True)
df_fin = df_fin.head(6)
## Crear Fechas de Prediccion
max_date = df_air['day_hour'].max()
start_date = max_date+datetime.timedelta(hours=1)
end_date = max_date+datetime.timedelta(hours=6)
predict_dates = pd.date_range(start=str(start_date),end=str(end_date),freq='H').to_list()
df_fin['day_hour']=predict_dates
## Extraer Año Mes Dia Hora de la fecha
df_fin['year']=df_fin['day_hour'].dt.year
df_fin['month']=df_fin['day_hour'].dt.month
df_fin['day']=df_fin['day_hour'].dt.day
df_fin['hour']=df_fin['day_hour'].dt.hour
## Recategorizar año
years = np.linspace(2016,2030,num=13,dtype='int')
years_dict = {}
for i, year in enumerate(years):
years_dict[year]=i+1
df_fin['year']=df_fin['year'].map(years_dict)
print("Data For Model Prediction Ready",end="\n\n")
return df_fin
def get_model(station,variable):
model_name = models.loc[(models['station']==station) & (models['variable']==variable),'model'].values[0]
rf = pickle.load(open('dash/'+model_name, 'rb'))
return rf
def create_plot_data(df,station,variable,lag=20):
print('Preparing Data For {} {} Plotting'.format(station,variable),end="\n\n")
df_air = df[df['station']==station].sort_values(by=['day_hour'],ascending=True).reset_index(drop=True).drop_duplicates(subset='day_hour')
## Seleccionar Ultimas 48 horas de info
end_date = df_air['day_hour'].max()
start_date = end_date-datetime.timedelta(hours=lag)
date_range = pd.date_range(start=str(start_date),end=str(end_date),freq='H').to_list()
df_air = df_air.loc[df_air['day_hour'].isin(date_range),['day_hour',variable]]
df_air['type']='Current'
## Preparar datos de prediccion
df_to_pred = prepare_pred_data(df=df,station=station,variable=variable)
## Cargar Modelo
rf = get_model(station=station,variable=variable[0:4])
## Hacer Prediccion
if variable == 'vsby':
df_to_pred[variable] = rf.predict(X=df_to_pred.drop('day_hour',axis=1))
else:
df_to_pred[variable] = np.exp(rf.predict(X=df_to_pred.drop('day_hour',axis=1)))
df_to_pred = df_to_pred[['day_hour',variable]]
df_to_pred['type']='Prediction'
## Link
df_link = df_to_pred.head(1)
df_link['type']='Prediction'
## Concatenar Data Sets
df_out = pd.concat([df_air,df_link,df_to_pred],ignore_index=True)
return df_out
def get_treshold(station, variable):
tresh = pd.read_csv('dash/treshold.csv')
## Info is in meters convert horizontal to miles and vertical to feet
tresh['value'] = np.where(tresh['variable']=='vsby',tresh['value']/1609.344,tresh['value']*3.28084)
x = tresh.loc[(tresh['station']==station) & (tresh['variable']==variable),'value'].values[0]
return x