-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnotes.sql
276 lines (254 loc) · 11 KB
/
notes.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
-- generate days
CREATE OR REPLACE VIEW active_days AS
SELECT day FROM
(SELECT to_char(((SELECT min(outages.started_at) - '1 day'::interval FROM
outages)::date + (a ||' days')::interval), 'YYYY-MM-DD') as day from
generate_series(1,3650,1) a) as b
WHERE day::date <= now()::date + '1 day'::interval;
-- On sunday, 8-midnight
-- On saturday, 6-midnight
-- On weekdays, 4-midnight
CREATE MATERIALIZED VIEW bart_biz_hours AS
SELECT
timestamptz (day || open_offset || ' US/Pacific')::timestamp as open,
timestamptz (day || ' 24:00:00 US/Pacific')::timestamp as close,
dow
FROM
(
SELECT
day, dow,
CASE WHEN dow = 0 THEN ' 08:00:00'
WHEN dow = 6 THEN ' 06:00:00'
ELSE ' 04:00:00'
END as open_offset
FROM (SELECT day, date_part('dow'::text, day::date) as dow FROM active_days) as
dows) as offsets;
-- biz hour outages
CREATE VIEW bart_biz_hour_outages AS
SELECT *, (close - open) AS duration FROM
(SELECT outages.id, outages.elevator_id,
GREATEST(bart_biz_hours.open, outages.started_at) AS open,
LEAST(bart_biz_hours.close, outages.ended_at) AS close
FROM outages
JOIN elevators ON outages.elevator_id = elevators.id
CROSS JOIN bart_biz_hours
WHERE
elevators.name NOT LIKE 'Muni%' AND
(outages.started_at, COALESCE(outages.ended_at, NOW())) OVERLAPS (bart_biz_hours.open, bart_biz_hours.close)
) AS t ;
-- avg outage length
-- on 2016-11-13:
-- `select count(*) FROM bart_biz_hour_outages;`: 2598
-- `select count(id) FROM bart_biz_hour_outages group by id;`: 1912
CREATE VIEW bart_biz_hour_outage_length AS
SELECT id, elevator_id, sum(duration) as duration
FROM bart_biz_hour_outages
GROUP BY id, elevator_id;
CREATE OR REPLACE VIEW analytics_bart_biz_hour_outage_length AS
SELECT COALESCE(name, 'ALL') as name, t.*
FROM (SELECT elevator_id, count(elevator_id), avg(duration), max(duration), min(duration)
FROM bart_biz_hour_outage_length
GROUP BY elevator_id
UNION (SELECT NULL as elevator_id, count(*), avg(duration), max(duration), min(duration)
FROM bart_biz_hour_outage_length)) as t
LEFT JOIN elevators
ON elevators.id = elevator_id
ORDER BY avg;
CREATE OR REPLACE VIEW analytics_bart_outage_length AS
SELECT COALESCE(name, 'ALL') as name, t.*
FROM (SELECT elevator_id, count(elevator_id), avg(duration), max(duration), min(duration)
FROM (SELECT elevator_id, (ended_at - started_at) as duration
FROM outages) as u1
GROUP BY elevator_id
UNION (SELECT NULL as elevator_id, count(*), avg(duration), max(duration), min(duration)
FROM (SELECT id, NULL, (ended_at - started_at) as duration
FROM outages) as u2)) as t
LEFT JOIN elevators
ON elevators.id = elevator_id
WHERE name NOT LIKE 'Muni%'
ORDER BY avg;
-- for a given elevator, sum the duration of its outages, and generate a
-- probabilty & percentage for each
CREATE VIEW bart_biz_hour_elevator_outages AS
SELECT elevator_name, elevator_id, station_id, sum,
ROUND((EXTRACT(EPOCH from sum)/EXTRACT(EPOCH FROM total)*100)::decimal, 1) AS percentage,
ROUND((EXTRACT(EPOCH from sum)/EXTRACT(EPOCH FROM total))::decimal, 3) AS probability
FROM
(SELECT elevators.name AS elevator_name, elevators.id AS elevator_id, station_id,
SUM(duration) AS sum
FROM elevators
JOIN bart_biz_hour_outages ON elevator_id = elevators.id
GROUP BY elevators.id
ORDER BY elevators.station_id) AS u
CROSS JOIN (SELECT SUM(close - open) AS total FROM bart_biz_hours) AS t;
-- singleton stations:
CREATE VIEW singleton_station_outages AS
SELECT station_id, stations.name,
probability,
percentage,
'singleton'::text AS station_type
FROM bart_biz_hour_elevator_outages
JOIN stations ON station_id = stations.id
WHERE station_id IN (7, 19, 44, 41, 6, 29, 28, 15, 31, 23, 8, 36, 37, 12, 21);
-- series stations:
CREATE VIEW series_street_outages AS
SELECT station_id, outages.* from outages
JOIN elevators ON outages.elevator_id = elevators.id
WHERE station_id IN (2, 3, 4, 9, 14, 18, 24, 27, 32, 34)
AND name LIKE '%Street%';
CREATE OR REPLACE VIEW series_platform_outages AS
SELECT station_id, outages.* from outages
JOIN elevators ON outages.elevator_id = elevators.id
WHERE station_id IN (2, 3, 4, 9, 14, 18, 24, 27, 32, 34)
AND name NOT LIKE '%Street%';
CREATE VIEW series_overlapping_outages AS
SELECT series_street_outages.station_id,
GREATEST(series_street_outages.started_at,
series_platform_outages.started_at) AS open,
LEAST(series_street_outages.ended_at,
series_platform_outages.ended_at) AS close
FROM series_street_outages
JOIN series_platform_outages
ON series_street_outages.station_id = series_platform_outages.station_id
WHERE (series_street_outages.started_at,
COALESCE(series_street_outages.ended_at, NOW()))
OVERLAPS (series_platform_outages.started_at,
COALESCE(series_platform_outages.ended_at, NOW()));
CREATE VIEW series_overlapping_biz_hour_outages AS
SELECT station_id, SUM(duration)
FROM
(SELECT *, (close - open) AS duration
FROM
(SELECT station_id,
GREATEST(series_overlapping_outages.open, bart_biz_hours.open) AS open,
LEAST(series_overlapping_outages.close, bart_biz_hours.close) AS close
FROM series_overlapping_outages
CROSS JOIN bart_biz_hours
WHERE (series_overlapping_outages.open,
COALESCE(series_overlapping_outages.close, NOW()))
OVERLAPS (bart_biz_hours.open, bart_biz_hours.close)) AS t) AS u
GROUP BY station_id;
CREATE VIEW series_station_outages AS
SELECT station_id, name,
ROUND((EXTRACT(EPOCH from sum)/EXTRACT(EPOCH FROM total))::decimal, 3) AS probability,
ROUND((EXTRACT(EPOCH from sum)/EXTRACT(EPOCH FROM total)*100)::decimal, 1) AS percentage,
'series'::text AS station_type
FROM
(SELECT t.station_id, (sum - overlap_sum) AS sum
FROM (SELECT station_id, SUM(sum)
FROM bart_biz_hour_elevator_outages
WHERE station_id IN (2, 3, 4, 9, 14, 18, 24, 27, 32, 34)
GROUP BY station_id) AS t
JOIN (SELECT station_id, sum AS overlap_sum
FROM series_overlapping_biz_hour_outages) AS u
ON t.station_id = u.station_id) AS v
JOIN stations ON stations.id = station_id
CROSS JOIN (SELECT SUM(close - open) AS total FROM bart_biz_hours) AS w;
-- compare old and new (used to do statistical analysis of series stations, but
-- later improved our approach)
-- SELECT series_station_outages_old.station_id,
-- stations.name,
-- series_station_outages_old.percentage,
-- series_station_outages.percentage,
-- (series_station_outages_old.percentage - series_station_outages.percentage)
-- FROM series_station_outages
-- JOIN series_station_outages2
-- ON series_station_outages_old.station_id = series_station_outages.station_id
-- JOIN stations ON stations.id = series_station_outages_old.station_id;
--CREATE VIEW series_station_outages_old AS
-- SELECT station_id, name,
-- (1 - (1 - p1)*(1 - p2)) AS probability,
-- (1 - (1 - p1)*(1 - p2))*100 AS percentage,
-- 'series'::text AS station_type
-- FROM
-- (SELECT station_id, name,
-- MIN(probability) AS p1,
-- MAX(probability) AS p2
-- FROM bart_biz_hour_elevator_outages
-- JOIN stations ON station_id = stations.id
-- WHERE station_id IN (2, 3, 4, 9, 14, 18, 24, 27, 32, 34)
-- GROUP BY station_id, name) AS probs;
-- parallel stations
CREATE VIEW parallel_station_outages AS
SELECT station_id, name,
AVG(probability) AS probability,
AVG(percentage) AS percentage,
'parallel'::text AS station_type
FROM bart_biz_hour_elevator_outages
JOIN stations ON station_id = stations.id
WHERE station_id IN (11, 16, 17, 20, 22, 25, 33, 39, 40, 42, 43, 45)
GROUP BY station_id, name;
-- outageless stations
CREATE VIEW outageless_stations AS
SELECT id AS station_id, name,
0 AS probability, 0 AS percentage,
'outageless'::text AS station_type
FROM stations
WHERE id NOT IN (SELECT DISTINCT station_id
FROM bart_biz_hour_elevator_outages)
AND name NOT LIKE 'Muni%';
-- special cases
CREATE VIEW ashby_outages AS
SELECT station_id, name, probability, percentage,
'special_case'::text AS station_type
FROM bart_biz_hour_elevator_outages
JOIN stations ON station_id = stations.id
WHERE elevator_id = 43;
-- 12th St
-- average the two street elevators, then treat as a series station
CREATE VIEW twelfth_st_outages AS
SELECT station_id, name,
(1 - (1 - p1)*(1 - p2)) AS probability,
(1 - (1 - p1)*(1 - p2))*100 AS percentage,
'special_case'::text AS station_type
FROM (SELECT station_id, probability AS p1, p2
FROM bart_biz_hour_elevator_outages
CROSS JOIN
(SELECT AVG(probability) AS p2
FROM bart_biz_hour_elevator_outages
WHERE elevator_id IN (1, 4)
GROUP BY station_id) AS street
WHERE elevator_id = 21) as probs
JOIN stations ON station_id = stations.id;
-- UNION all the station-outage tables together
CREATE VIEW station_outages AS
SELECT station_id, name,
station_type,
ROUND(probability, 3) AS probability,
ROUND(percentage, 1) AS percentage
FROM
(SELECT * FROM singleton_station_outages
UNION SELECT * FROM series_station_outages
UNION SELECT * FROM parallel_station_outages
UNION SELECT * FROM outageless_stations
UNION SELECT * FROM ashby_outages
UNION SELECT * FROM twelfth_st_outages
ORDER BY percentage DESC ) AS u;
-- Add some analytics for funsies
CREATE VIEW analytics_station_outages AS
SELECT NULLIF(GREATEST(row_number() OVER () - 2, 0), 0) AS rank,
station_id, name, station_type, probability, percentage
FROM
(SELECT sort_order, station_id, name, station_type, probability, percentage
FROM (SELECT NULL AS station_id, 'AVERAGE'::text AS name, NULL AS station_type,
ROUND(AVG(probability), 3) AS probability,
ROUND(AVG(percentage), 1) AS percentage,
0 AS sort_order
FROM station_outages
UNION SELECT *, 100 AS sort_order FROM station_outages) AS t
UNION SELECT 1 AS sort_order, NULL AS station_id, 'PER TRIP'::text AS name, NULL AS station_type,
ROUND((1 - POW((1 - AVG(probability)), 2)), 3) AS probability,
ROUND((1 - POW((1 - AVG(probability)), 2))*100, 1) AS percentage
FROM station_outages
ORDER BY sort_order ASC, probability DESC) AS u;
-- station-pair analytics
CREATE VIEW analytics_station_pairs AS
SELECT row_number() OVER (), *
FROM (SELECT station_outages.name as name1, u.name as name2,
ROUND(1 - (1 - station_outages.probability)*(1-u.probability),3) AS probability,
ROUND((1 - (1 - station_outages.probability)*(1-u.probability))*100,1) AS percentage
FROM station_outages
CROSS JOIN (SELECT * FROM station_outages) AS u
JOIN stations ON stations.id = station_outages.station_id
WHERE station_outages.station_id < u.station_id
ORDER BY percentage DESC) as t;