From c859362595ff97097407488589753c3701a232b5 Mon Sep 17 00:00:00 2001 From: Oliver Denman Date: Tue, 2 May 2017 12:31:58 +0100 Subject: [PATCH] fixup! Extract MemberPage --- lib/member_page.rb | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/member_page.rb b/lib/member_page.rb index 78ba43c34..412334dee 100644 --- a/lib/member_page.rb +++ b/lib/member_page.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require 'scraped' -require 'pry' # This class represents the profile page of a given member class MemberPage < Scraped::HTML @@ -24,7 +23,7 @@ class MemberPage < Scraped::HTML end field :honorific_suffix do - name_parts[1..-1].map(&:tidy).join(', ') + name_parts.drop(1).map(&:tidy).join(', ') end field :gender do @@ -32,15 +31,12 @@ class MemberPage < Scraped::HTML end field :faction do - f = bio.xpath('//p[contains(.,"Political affiliation")]/'\ - 'following-sibling::ul[not(position() > 1)]/li/text()') - return 'Independent' if f.empty? - + return 'Independent' if (affiliation = political_affiliation).empty? # Some member pages list more than one group affiliation for that member # Here, we remove affiliations with known non-party groups - f.map(&:to_s).map(&:tidy).find do |party| - !non_party_groups.to_s.include? party - end + affiliation.map(&:to_s).map(&:tidy).reject do |party| + non_party_groups.to_s.include? party + end.first end field :email do @@ -69,8 +65,7 @@ class MemberPage < Scraped::HTML end field :area do - # splitting here by en-dash (not hyphen) - area_parts.last.split('–').last.tidy + area_parts.last.split("\u{2013}").last.tidy end field :area_type do @@ -91,7 +86,7 @@ def area_parts end def name_parts - bio.css('h2').text.split(',') + bio.css('h2').text.split(',').map(&:tidy) end def titles @@ -103,10 +98,15 @@ def bio end def non_party_groups - [ + Set[ 'Kowloon West New Dynamic', 'New Territories Association of Societies', 'April Fifth Action', ] end + + def political_affiliation + bio.xpath('//p[contains(.,"Political affiliation")]/'\ + 'following-sibling::ul[not(position() > 1)]/li/text()') + end end