Fix votes shown in output CSV

The logic that prepared a `Result` object for CSV output was incorrectly setting the votes to the total votes for each cadidate instead of the votes for each candidate in each jurisdiction. Include the vote type in the CSV results. Reorder CSV columns to match examples in http://docs.openelections.net/data-entry/ Add logging in the `Parser` class and an option in the `results` command to make it easier to identify problems with parsing or application logic. Addresses openelections#18 openelections#18
ghing · Jul 2, 2017 · 205ba06 · 205ba06
1 parent 4678010
commit 205ba06
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 16 deletions.
diff --git a/clarify/cli/results.py b/clarify/cli/results.py
@@ -1,6 +1,7 @@
 import errno
 import hashlib
 from itertools import chain
+import logging
 import os
 import shutil
 import sys
@@ -74,12 +75,15 @@ def result_as_dict(result, **addl_cols):
     #  it's in the data).
 
     if result.jurisdiction is not None:
-        result_dict['jurisdiction'] = result.jurisdiction.name
+        jurisdiction_key = result.jurisdiction.level
+        result_dict[jurisdiction_key] = result.jurisdiction.name
 
     if result.choice is not None:
         result_dict['candidate'] = result.choice.text
         result_dict['party'] = result.choice.party
-        result_dict['votes'] = result.choice.total_votes
+
+    result_dict['votes'] = result.votes
+    result_dict['vote_type'] = result.vote_type
 
     return result_dict
 
@@ -96,24 +100,27 @@ def get_results(paths):
     return chain.from_iterable(get_results_from_file(unzip(path)) for path in paths)
 
 
-
-
 def add_parser(subparsers):
     parser = subparsers.add_parser('results',
         description="Fetch election results as CSV from from a Clarity system")
     parser.add_argument('results_url',
             help="URL for the main results page for the election")
     parser.add_argument('--cachedir', default=None,
             help="Location of directory where files will be downloaded. By default, a temporary directory is created")
+    parser.add_argument('--log', default=None)
     parser.set_defaults(func=main)
 
     return parser
 
 
 def main(args):
-    # TODO: We need to have some kind of subjurisdiction selection because the
-    # script just takes too long to run otherwise
-    # BOOKMARK
+    if args.log is not None:
+        numeric_level = getattr(logging, args.log.upper(), None)
+        if not isinstance(numeric_level, int):
+            raise ValueError("Invalid log level: {level}".format(
+                level=args.log))
+
+        logging.basicConfig(level=numeric_level)
 
     cache_path = args.cachedir
     temporary_cache_dir = False
@@ -131,18 +138,34 @@ def main(args):
     results_iter = get_results(fetch_urls(get_report_urls([base_jurisdiction]),
         cache_path))
 
+    # We want the fields in the output CSV, and their order to match those
+    # in the data entry instructions for OpenElex
+    # (http://docs.openelections.net/data-entry/#instructions).
     fieldnames = [
-        'jurisdiction',
         'office',
         'candidate',
         'party',
-        'votes',
     ]
 
+    # Now add in the jurisdiction columns.  Unlike the  examples in the
+    # docs, we'll also add a state column, useful if you're slamming results
+    # into a single database.
     addl_cols = {}
     for level in levels:
         addl_cols[level['level']] = level['name']
-        fieldnames = [level['level']] + fieldnames
+        fieldnames.append(level['level'])
+
+    if lowest_level == 'county':
+        # URL indicates county, which means we'll have results for each
+        # precinct
+        fieldnames.append('precinct')
+
+
+    # Votes go last
+    fieldnames += [
+        'votes',
+        'vote_type',
+    ]
 
     writer = csv.DictWriter(sys.stdout, fieldnames=fieldnames)
     writer.writeheader()

diff --git a/clarify/parser.py b/clarify/parser.py
@@ -1,5 +1,6 @@
 import datetime
 from collections import namedtuple
+import logging
 
 import dateutil.parser
 from lxml import etree
@@ -143,7 +144,7 @@ def _parse_result_jurisdictions(self, tree):
         result_jurisdictions = []
         precinct_els = tree.xpath('/ElectionResult/VoterTurnout/Precincts/Precinct')
         for el in precinct_els:
-            result_jurisdictions.append(ResultJurisdiction(
+            jurisdiction = ResultJurisdiction(
               name=el.attrib['name'],
               total_voters=int(el.attrib['totalVoters']),
               ballots_cast=int(el.attrib['ballotsCast']),
@@ -153,10 +154,15 @@ def _parse_result_jurisdictions(self, tree):
               precincts_reported=None,
               precincts_reporting_percent=None,
               level='precinct'
-            ))
+            )
+            logging.debug('Parsed {level} jurisdiction "{name}"'.format(
+                level=jurisdiction.level, name=jurisdiction.name))
+
+            result_jurisdictions.append(jurisdiction)
+
         county_els = tree.xpath('/ElectionResult/ElectionVoterTurnout/Counties/County')
         for el in county_els:
-            result_jurisdictions.append(ResultJurisdiction(
+            jurisdiction = ResultJurisdiction(
               name=el.attrib['name'],
               total_voters=int(el.attrib['totalVoters']),
               ballots_cast=int(el.attrib['ballotsCast']),
@@ -166,7 +172,11 @@ def _parse_result_jurisdictions(self, tree):
               precincts_reported=float(el.attrib['precinctsReported']),
               precincts_reporting_percent=float(el.attrib['precinctsReportingPercent']),
               level='county'
-            ))
+            )
+            logging.debug('Parsed {level} jurisdiction "{name}"'.format(
+                level=jurisdiction.level, name=jurisdiction.name))
+            result_jurisdictions.append(jurisdiction)
+
         return result_jurisdictions
 
     @property
@@ -279,6 +289,7 @@ def _parse_contest(self, contest_el, result_jurisdiction_lookup):
            counties_reported=self._get_attrib(contest_el, 'countiesReported', int),
            counties_participating=self._get_attrib(contest_el, 'countiesParticipating', int)
         )
+        logging.debug('Parsed contest "{text}"'.format(text=contest.text))
 
         for r in self._parse_no_choice_results(contest_el, result_jurisdiction_lookup, contest):
             contest.add_result(r)
@@ -374,6 +385,11 @@ def _parse_choice(self, contest_el, contest, result_jurisdiction_lookup):
             party=party,
             total_votes=contest_el.attrib['totalVotes'],
         )
+        logging.debug('Parsed choice "{text}" ({party}) with {total_votes} votes'.format(
+            text=choice.text,
+            party=choice.party,
+            total_votes=choice.total_votes
+        ))
 
         for vt_el in contest_el.xpath('./VoteType'):
             vote_type = vt_el.attrib['name']
@@ -387,13 +403,23 @@ def _parse_choice(self, contest_el, contest, result_jurisdiction_lookup):
 
             for subjurisdiction_el in vt_el.xpath('./Precinct') + vt_el.xpath('./County'):
                 subjurisdiction = result_jurisdiction_lookup[subjurisdiction_el.attrib['name']]
-                choice.add_result(Result(
+                result = Result(
                     contest=contest,
                     vote_type=vote_type,
                     jurisdiction=subjurisdiction,
                     votes=int(subjurisdiction_el.attrib['votes']),
                     choice=choice
-                ))
+                )
+                logging.debug(
+                    'Parsed "{vote_type}" result for "{choice}" in '
+                    '"{jurisdiction}" with {votes} votes'.format(
+                        vote_type=result.vote_type,
+                        choice=choice.text,
+                        jurisdiction=subjurisdiction.name,
+                        votes=result.votes))
+
+                choice.add_result(result)
+
         return choice
 
     @classmethod