-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsummarize-activity.py
117 lines (95 loc) · 4.04 KB
/
summarize-activity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
from __future__ import division
import numpy as np
from datetime import datetime, date
from dateutil import parser
from collections import defaultdict
import math
import time
import calendar
import shelve
import pickle
holidays = set([
(2013, 5, 17), # Buddha's birthday
(2013, 5, 18), # oops?
(2013, 6, 6), # Memorial day
])
placedb = shelve.open('placecategory.db', 'r')
storylinedb = shelve.open('storyline.db', 'r')
activity2category = {
'trp': 'transport',
'wlk': 'walk',
'run': 'walk',
}
def digest_storyline():
for storyline in storylinedb.itervalues():
if storyline[u'segments'] is None:
continue
for segment in storyline[u'segments']:
if segment[u'type'] == u'move':
for activity in segment.get(u'activities', []):
startTime = parser.parse(activity[u'startTime'])
endTime = parser.parse(activity[u'endTime'])
category = activity2category[activity[u'activity']]
yield (category, startTime, endTime)
elif segment[u'type'] == u'place':
startTime = parser.parse(segment[u'startTime'])
endTime = parser.parse(segment[u'endTime'])
try:
category = placedb[str(segment[u'place'][u'id'])]
except KeyError:
# unnamed place found. uncomment this block if you want to fix them.
# if (endTime - startTime).total_seconds() > 300:
# import pytz
# print '=============='
# print startTime.astimezone(pytz.timezone('Asia/Tokyo'))
# pprint.pprint(segment)
# raise
pass
else:
yield (category, startTime, endTime)
else:
raise ValueError('Unknown type of segment: ' + segment[u'type'])
class TimeBlocksCounter(object):
def __init__(self, timeunit, timezone):
self.timeunit = timeunit
self.timezone = timezone
self.timeunit_a_day = 24 * 60 * 60 / timeunit
self.blocks = defaultdict(lambda: defaultdict(float))
def divide_time(self, start, end):
start_ts = calendar.timegm(start.timetuple())
end_ts = calendar.timegm(end.timetuple())
secondblkstart = math.ceil(start_ts / self.timeunit) * self.timeunit
if secondblkstart >= end_ts:
yield (start_ts, end_ts)
return
yield (start_ts, secondblkstart)
for blkstart in np.arange(secondblkstart, end_ts, self.timeunit):
yield (blkstart, min(end_ts, blkstart + self.timeunit))
def update(self, starttime, endtime, activity):
for blkstart, blkend in self.divide_time(starttime, endtime):
if not self.is_weekday(blkstart):
continue
blkno = int(int((blkstart - self.timezone) / self.timeunit) % self.timeunit_a_day)
duration = blkend - blkstart
self.blocks[blkno][activity] += duration
def get_result(self):
allactivities = set()
result = []
for i in range(24 * 60 * 60 // self.timeunit):
totalsec = sum(self.blocks[i].itervalues())
blkresult = dict((activity, sec / totalsec)
for activity, sec in self.blocks[i].iteritems())
allactivities.update(set(self.blocks[i]))
result.append((i, blkresult))
return result, allactivities
def is_weekday(self, ts):
localtime = time.localtime(ts)
return localtime.tm_wday < 5 and localtime[:3] not in holidays
if __name__ == '__main__':
TIMEUNIT = 300
storyline = sorted(set(digest_storyline()), key=lambda x: (x[1], x[2], x[0]))
timecounter = TimeBlocksCounter(TIMEUNIT, time.timezone)
for category, starttime, endtime in storyline:
timecounter.update(starttime, endtime, category)
pickle.dump(timecounter.get_result(), open('summarized-weekdays-life.pickle', 'w'))