Skip to content

Commit

Permalink
Merge pull request #17 from alonisser/master
Browse files Browse the repository at this point in the history
Faster committee protocol speaker id parsing
  • Loading branch information
alonisser authored Jul 24, 2017
2 parents d78bcb8 + 11f5742 commit ea52212
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 13 deletions.
23 changes: 15 additions & 8 deletions knesset_data_django/committees/protocol_part_builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*

from functools32 import lru_cache
from knesset_data.protocols.protocol_header_parser import ProtocolHeaderParser
from collections import namedtuple

Expand All @@ -9,19 +9,27 @@
ProtocolPartResult = namedtuple('ProtocolPartResult', ['meeting', 'order', 'header', 'body', 'speaker'])


def create_person_name_mk_id_map(mks):
persons = Person.objects.filter(mk__in=mks).select_related()
@lru_cache(maxsize=1000)
def create_person_name_mk_id_map(mks_ids):
persons = Person.objects.filter(mk_id__in=mks_ids).select_related()
aliases = PersonAlias.objects.filter(person__in=persons).select_related()
persons_map = dict()
for person in persons:
persons_map[person.name] = person.mk.id
persons_map[person.name] = person.id

for alias in aliases:
persons_map[alias.name] = alias.person.mk.id
persons_map[alias.name] = alias.person.id

return persons_map


@lru_cache(maxsize=1000)
def parse_header(mks_ids, header):
mapped_names_ids = create_person_name_mk_id_map(mks_ids)
header_text, speaker_id = ProtocolHeaderParser(header, mapped_names_ids).parse()
return speaker_id


class CommitteeProtocolPartBuilder(object):
def __init__(self, meeting, order, header, body, mks):
self.mks = mks
Expand All @@ -32,10 +40,9 @@ def __init__(self, meeting, order, header, body, mks):
self.body = body

def build(self):
# mapped_names_ids = create_person_name_mk_id_map(self.mks)
# header_text, speaker_id = ProtocolHeaderParser(self.header, mapped_names_ids).parse()
speaker_id = parse_header(tuple([mk.id for mk in self.mks]), self.header)
return ProtocolPart(meeting=self.committee_meeting,
order=self.order,
body=self.body,
# speaker_id=speaker_id,
speaker_id=speaker_id,
header=self.header)
6 changes: 1 addition & 5 deletions knesset_data_django/committees/tests/testapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,7 @@ def given_party_exists_in_knesset(cls, party_name, knesset):
@classmethod
def given_member_exists_in_knesset(cls, member_name, party, start_date=ten_days_ago.date(), end_date=None):
member, create = Member.objects.get_or_create(name=member_name, start_date=ten_days_ago.date())
# membership, create = Membership.objects.get_or_create(member=member, party=party,
# start_date=party.knesset.start_date)
# if end_date:
# membership.end_date = end_date
# membership.save()

return member

@classmethod
Expand Down
4 changes: 4 additions & 0 deletions knesset_data_django/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# Django settings for knesset-data-django testing app
import os, sys
from knesset_data_django import KNESSET_DATA_DJANGO_APPS
import logging
logging.basicConfig(level=logging.INFO, stream=sys.stdout,
format="%(asctime)s:%(name)s:%(lineno)d:%(levelname)s:%(message)s")


DEBUG = True
TEMPLATE_DEBUG = DEBUG
Expand Down

0 comments on commit ea52212

Please sign in to comment.