-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathcoordinates_states.py
31 lines (28 loc) · 1.06 KB
/
coordinates_states.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from pygeocoder import Geocoder
import us, csv, json
WRITE = 'wb'
READ = 'rb'
#corpus = json.load(open('bieber-raw-test.json',READ))
corpus = json.load(open('#birthday-birthday-20140626-082517.json',READ))
#corpus = json.load(open('sxsw-SXSW-#SXSW-#sxsw-20140308-001535.json',READ))
def get_location(tweet):
if tweet['coordinates']:
lat,lon = tuple(tweet['coordinates']['coordinates'])
try:
location = Geocoder.reverse_geocode(lat,lon)
if location.state == 'United States':
return us.states.lookup(location.administrative_area_level_1).abbr.strip()
except:
pass
elif tweet['place']:
return tweet['place']['full_name'].split(',')[-1].strip()
print 'Hi'
locations = [get_location(tweet) for tweet in corpus]
print filter(None,locations)
states = [state.name for state in us.states.STATES]
prevalence = {state:count for state,count in zip(states,[locations.count(state) for state in states])}
print prevalence
with open('prevalence-birthday.csv', 'wb') as f:
print>>f,'name,prevalence'
for state,count in prevalence.items():
print>>f,'%s,%d'%(state,count)