-
Notifications
You must be signed in to change notification settings - Fork 0
/
common.py
114 lines (98 loc) · 4.77 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import numpy as np
import scipy as sp
import random as r
import code
import math
import timeit
from pylab import *
class dataset:
def __init__(self, dataPath = "/projects/onebusaway/BakerNiedMLProject/data/routefeatures", resPath = "/projects/onebusaway/BakerNiedMLProject/data/modelPredictions", figPath = "/projects/onebusaway/BakerNiedMLProject/figures/predictions", serviceName = "intercitytransit", routeName = "route13", xSet = "dist", ySet = "dev", xFeats = []):
self.dataPath = dataPath
self.resPath = resPath
self.figPath = figPath
self.serviceName = serviceName
self.routeName = routeName
self.xSet = xSet
self.xFeats = xFeats
self.ySet = ySet
def load(self, N_points = -1, validation = False, timeorganized = False, Nparts = 4):
# Get the data from the files
self.xFull = np.loadtxt("{}/{}_{}_{}.txt".format(self.dataPath, self.serviceName, self.routeName, self.xSet), dtype=np.float)
self.yFull = np.loadtxt("{}/{}_{}_{}.txt".format(self.dataPath, self.serviceName, self.routeName, self.ySet), dtype=np.float)
self.times = np.loadtxt("{}/{}_{}_timeglobal.txt".format(self.dataPath, self.serviceName, self.routeName), dtype=np.float)
# Limit features
# if xSet == "allfeats" -> DIST, DISTOLD, LAT, LON, TIMEGLOBAL, DAYOFWEEK, DAYS, TIME, TRIPID, DEV
if(len(xFeats) > 0)
self.xFull = self.xFull[:, xFeats]
# Divide the data into sets for Training, Validation, and Testing
if N_points == -1:
self.N = len(self.xFull)
else:
self.N = N_points
self.modelSets = np.random.permutation(range(len(self.xFull))) * Nparts / self.N
self.xTest = self.xFull[self.modelSets == Nparts - 1]
self.yTest = self.yFull[self.modelSets == Nparts - 1]
self.tTest = self.times[self.modelSets == Nparts - 1]
self.xScope = self.xFull[self.modelSets <= Nparts - 1]
self.yScope = self.yFull[self.modelSets <= Nparts - 1]
self.tScope = self.times[self.modelSets <= Nparts - 1]
if(validation):
self.xTrain = self.xFull[self.modelSets <= Nparts - 3]
self.xVal = self.xFull[self.modelSets == Nparts - 2]
self.yTrain = self.yFull[self.modelSets <= Nparts - 3]
self.yVal = self.yFull[self.modelSets == Nparts - 2]
self.tTrain = self.times[self.modelSets <= Nparts - 3]
self.tVal = self.times[self.modelSets == Nparts - 2]
else:
self.xTrain = self.xFull[self.modelSets <= Nparts - 2]
self.yTrain = self.yFull[self.modelSets <= Nparts - 2]
self.tTrain = self.times[self.modelSets <= Nparts - 2]
def save(self, data, model = "model"):
np.savetxt("{}/{}_{}_{}_{}_{}.txt".format(self.resPath, self.serviceName, self.routeName, model, self.xSet, self.ySet), data)
# Save the yHat vector with its xs and real ys
def saveYHat(self, data, model = "model"):
np.savetxt("{}/{}_{}_{}_{}_{}.txt".format(self.resPath, self.serviceName, self.routeName, model, self.xSet, self.ySet), cmb(self.xTest, self.yTest, data))
# Draws a plot of the data and error
def visualize(self, yHat, specification="model"):
if(self.xTrain.ndim == 1):
self.xTrain.shape = (len(self.xTrain), 1)
self.xTest.shape = (len(self.xTest), 1)
for i in range(self.xTrain.shape[1]):
clf()
plot(self.xTrain[:, i], self.yTrain, 'b+')
plot(np.vstack((self.xTest[:, i], self.xTest[:, i])), np.vstack((self.yTest.T, yHat.T)), 'r')
plot(self.xTest[:, i], self.yTest, 'rx')
savefig("{}/{}_{}_{}_feat{}.png".format(self.figPath, self.serviceName, self.routeName, specification, i))
ylabel("Schedule Delay")
xlabel("Feature {}".format(i))
title("{} {} {}".format(self.serviceName, self.routeName, specification))
#def main():
# load()
# yHat = model(xTrain, yTrain, xTest, yTest)
# save(np.append())
# Visualize and save the images for the model
# visualize(xTrain, yTrain, xTest, yTest, yHat, "dist_{}NN".format(k))
def cmb(a, b, c):
if(a.ndim == 1):
a.shape = (a.shape[0], 1)
if(b.ndim == 1):
b.shape = (a.shape[0], 1)
if(c.ndim == 1):
c.shape = (a.shape[0], 1)
return np.append(a, np.append(b, c, axis = 1), axis = 1)
def rmse(y, yhat):
yhat.shape = y.shape
ydiff = y - yhat;
count = 0;
for i in range(len(yhat)):
count += ydiff[i] * ydiff[i];
return (count / len(ydiff)) ** 0.5
class timer:
def __init__(self):
self.reset()
def reset(self):
self.time = timeit.default_timer()
def dur(self):
return timeit.default_timer() - self.time
if __name__ == "__main__":
main()