forked from openelections/openelections-data-ny
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnyc_parser.py
102 lines (91 loc) · 3.38 KB
/
nyc_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import argparse
import csv
import re
import sys
arg_parser = argparse.ArgumentParser(description='Parse nyc voting csv\'s.')
arg_parser.add_argument('csvfilepath', type=str, nargs=1)
arg_parser.add_argument('--print_header', type=bool, default=False)
args = arg_parser.parse_args()
precinct_to_data = {}
other_vote_titles = ['Public Counter', 'Emergency', 'Absentee/Military', 'Federal', 'Affidavit', 'Scattered', 'Manually Counted Emergency', 'Special Presidential']
office_mapping = {
'United States Senator': 'U.S. Senate',
'Representative in Congress': 'U.S. House',
'State Senator': 'State Senate',
'Member of the Assembly': 'State Assembly',
'President/Vice President': 'President'
}
if args.print_header:
print('county,precinct,office,district,party,candidate,votes,public_counter_votes,emergency_votes,absentee_military_votes,federal_votes,affidavit_votes,manually_counted_emergency,special_presidential')
with open(args.csvfilepath[0], 'rb') as csvfile:
line = csv.reader(csvfile, delimiter=',', quotechar='"')
line_number = 0
for row in line:
if line_number == 0:
line_number += 1
continue
(ad, ed, county, edad_status, _, party, office, district, _, candidate, votes) = row
if not party:
match = re.search('^(.*) \((.*)\)$', candidate)
if match:
party = match.group(2)
precinct = '%s/%s' % (ed, ad)
data = precinct_to_data.get(precinct, {})
data[candidate] = {}
data[candidate]['county'] = county
data[candidate]['party'] = party
data[candidate]['office'] = office
data[candidate]['district'] = district
data[candidate]['votes'] = votes
data[candidate]['status'] = edad_status
precinct_to_data[precinct] = data
line_number += 1
def print_precinct(precinct, data):
actual_candidates = set(data.keys()) - set(other_vote_titles)
for candidate in actual_candidates:
candidate_data = data.get(candidate)
votes = 0
line = None
if candidate_data:
match = re.search('^(.*) \((.*)\)$', candidate)
if match:
candidate = match.group(1)
votes = int(candidate_data['votes'].replace(',', ''))
county = candidate_data['county']
party = candidate_data['party']
office = candidate_data['office']
if office in office_mapping:
office = office_mapping[office];
district = candidate_data['district']
if not district.isdigit():
district = ''
else:
district = int(district)
line = '%s,%s,%s,%s,%s,%s,%d' % (
county, precinct, office, district, party, candidate, votes)
if not line:
datum = (data.values())[0]
county = datum['county']
party = datum['party']
office = datum['office']
if office in office_mapping:
office = office_mapping[office];
district = datum['district']
if not district.isdigit():
district = ''
else:
district = int(district)
line = '%s,%s,%s,%s,%s,%s,%d' % (
county, precinct, office, district, party, candidate, votes)
for other_vote_title in other_vote_titles:
other_vote_data = data.get(other_vote_title)
if other_vote_data:
line += ',' + other_vote_data['votes']
else:
line += ','
print(line)
for precinct in precinct_to_data:
data = precinct_to_data[precinct]
status = data[other_vote_titles[0]]['status']
if status == 'IN-PLAY':
print_precinct(precinct, data)