class UkParliament::MemberSummaryDocPipeline

Class defining the pipeline process of a scraped member summary document.

Public Class Methods

new(house_id, document) click to toggle source

Initialise the class, calling the parent class init, with provided args.

Calls superclass method UkParliament::DocPipeline::new
# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 5
def initialize(house_id, document)
  super
end

Public Instance Methods

enrich_member_data(member) click to toggle source

Produce the member summary.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 10
def enrich_member_data(member)
  @member = member

  execute
end

Private Instance Methods

address(node, section_id) click to toggle source

Extract the address value from a document node.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 53
def address(node, section_id)
  unless node.nil?
    @member[section_id]['address'] = node.content.strip
  end
end
commons_member_name() click to toggle source

Extract a commons member name value from a document node.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 165
def commons_member_name
  section_id = 'name'
  @member[section_id] = {}

  title_list = %w(Mr Mrs Ms Dr Sir Dame Lady Lord)
  # String: "Abbot, Ms Diane"
  components = @member['alphabetical_name'].split(',')
  # Array: |Abbot| Ms Diane|
  surname = components.shift
  # Array: | Ms Diane|
  components = components.join.split(' ')
  # Array: |Ms|Diane|
  if title_list.include?(components[0])
    @member[section_id]['title'] = components.shift
  end
  # Array: |Diane|
  components << surname
  # Array: |Diane|Abbot|
  @member[section_id]['full_name'] = components.join(' ')
  @member[section_id]['given_name'] = components.shift
  @member[section_id]['surname'] = components.pop

  unless components.empty?
    @member[section_id]['middle_names'] = components
  end
end
constituency_details() click to toggle source

Extract the constituency contact details for a member.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 91
def constituency_details
  nodeset = @document.xpath("//div[contains(@class, 'contact-detail') and contains(@class, 'constituency')]")

  if nodeset.length > 0
    section_id = 'constituency_contact'
    @member[section_id] = {}
    section_contact_details(section_id, nodeset)
  end
end
decode_email(code) click to toggle source

Decode the Cloudflare encoded email address.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 82
def decode_email(code)
  k = code[0..1].hex

  (2..(code.size - 1)).step(2).to_a.map{ |i|
    (code[i..(i + 1)].hex ^ k).chr
  }.join
end
define_commons_tasks() click to toggle source

Define the tasks that will be performed for the commons member summary pipeline.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 20
def define_commons_tasks
  @commons_tasks = %w(parliamentary_details departmental_details constituency_details digital_details commons_member_name)
end
define_lords_tasks() click to toggle source

Define the tasks that will be performed for the lords member summary pipeline.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 26
def define_lords_tasks
  @lords_tasks = %w(parliamentary_details departmental_details external_office_details digital_details lords_member_name)
end
departmental_details() click to toggle source

Extract the departmental office contact details for a member.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 113
def departmental_details
  nodeset = @document.xpath("//div[contains(@class, 'contact-detail') and contains(@class, 'departmental')]")

  if nodeset.length > 0
    section_id = 'departmental_contact'
    @member[section_id] = {}
    section_contact_details(section_id, nodeset)
  end
end
digital_details() click to toggle source

Extract the digital contact details for a member.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 124
def digital_details
  nodeset = @document.xpath("//div[@id = 'web-social-media']")

  web = nodeset.xpath(".//*[@data-generic-id = 'website']/a")
  twitter = nodeset.at_xpath(".//*[@data-generic-id = 'twitter']/a")
  facebook = nodeset.at_xpath(".//*[@data-generic-id = 'facebook']/a")

  web(web)
  twitter(twitter)
  facebook(facebook)
end
email(node, section_id) click to toggle source

Extract email value from a document node.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 75
def email(node, section_id)
  unless node.nil?
    @member[section_id]['email'] = decode_email(node['data-cfemail'])
  end
end
external_office_details() click to toggle source

Extract the external office contact details for a member.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 102
def external_office_details
  nodeset = @document.xpath("//div[contains(@class, 'contact-detail') and contains(@class, 'externalprivate-office')]")

  if nodeset.length > 0
    section_id = 'external_contact'
    @member[section_id] = {}
    section_contact_details(section_id, nodeset)
  end
end
facebook(node) click to toggle source

Extract Facebook link value from a document node.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 158
def facebook(node)
  unless node.nil?
    @member['facebook'] = node['href']
  end
end
lords_member_name() click to toggle source

Extract a lords member name value from a document node.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 193
def lords_member_name
  section_id = 'name'
  @member[section_id] = {}

  table = @document.xpath("//table[@class = 'personal-details-container']")

  full_title = table.at_xpath("//div[@id = 'lords-fulltitle']")
  @member[section_id]['full_title'] = full_title.content.strip

  name = table.at_xpath("//div[@id = 'lords-name']")
  components = name.content.strip.split(' ')
  @member[section_id]['full_name'] = components.join(' ')
  @member[section_id]['given_name'] = components.shift
  @member[section_id]['surname'] = components.pop

  unless components.empty?
    @member[section_id]['middle_names'] = components
  end
end
parliamentary_details() click to toggle source

Extract the parliamentary contact details for a member.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 31
def parliamentary_details
  nodeset = @document.xpath("//div[contains(@class, 'contact-detail') and contains(@class, 'parliamentary')]")

  if nodeset.length > 0
    section_id = 'parliamentary_contact'
    @member[section_id] = {}
    section_contact_details(section_id, nodeset)
  end
end
phone_fax(node, section_id) click to toggle source

Extract the phone/fax value(s) from a document node.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 60
def phone_fax(node, section_id)
  unless node.nil?
    # Some telephone values include a 'Fax' number label/value as well as a
    # 'Tel' number label/value
    if node.content.include?('Fax')
      parts = node.content.strip.gsub(/\s+/, ' ').split(/fax:*\s*/i)
      @member[section_id]['telephone'] = parts[0].gsub(/tel:*\s*/i, '').strip
      @member[section_id]['fax'] = parts[1]
    else
      @member[section_id]['telephone'] = node.content.strip.gsub(/\s+/, ' ').sub(/tel:*\s*/i, '')
    end
  end
end
section_contact_details(section_id, nodeset) click to toggle source

Create a container for a particular section of contact details.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 42
def section_contact_details(section_id, nodeset)
  address = nodeset.at_xpath(".//*[@data-generic-id = 'address']")
  phone_fax = nodeset.at_xpath(".//*[@data-generic-id = 'telephone']")
  email = nodeset.at_xpath(".//*[@data-generic-id = 'email-address']/a/span[@class = '__cf_email__']")

  address(address, section_id)
  phone_fax(phone_fax, section_id)
  email(email, section_id)
end
twitter(node) click to toggle source

Extract Twitter account values from a document node.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 148
def twitter(node)
  unless node.nil?
    @member['twitter'] = {
      'profile' => node['href'],
      'username' => node.child.content
    }
  end
end
web(nodeset) click to toggle source

Extract web address value(s) from a document node.

# File lib/uk_parliament/member_summary_doc_pipeline.rb, line 137
def web(nodeset)
  unless nodeset.nil? || nodeset.empty?
    @member['web'] = []

    nodeset.each { |node|
      @member['web'] << node['href']
    }
  end
end