class PollyPhone::Site

Constants

DEFAULT_DESCRIPTION_TYPE

Attributes

main_url[RW]

Public Class Methods

new(file_path) click to toggle source

Create object with configuration for parsing with following params @name - title of web-site we want to parse @main_url - main url of web-site we want to parse @brands_url - additional path to catalog page where we want to get rand names @brand_link_path - path for Nokogiri search method to find element with href param for brand @brand_name_item - path for Nokogiri at_xpath method to find element with name of brand

(only use when brand name is within another element, otherwise get name of brand link element)

@brand_source - only use when we need another element param to find brand content, f.e. 'alt'

(otherwise use element content)

@model_link_path - path for Nokogiri search method to find element with href param for model @model_image_item - path for Nokogiri at_xpath method to find element with src param for model image @model_name_item - path for Nokogiri at_xpath method to find element with content for model name @spec_path - path for Nokogiri at_xpath method to find container with phone description @spec_type_item - path for Nokogiri at_xpath method to find description type within the container @spec_category_item - path for Nokogiri at method to find description category within the container @spec_body_item - path for Nokogiri at method to find description text within the container @search_page - additional path for main url for web-site search

# File lib/polly_phone/site.rb, line 33
def initialize(file_path)
  site_config = symbolize_keys(YAML.load_file(file_path))
  assign_attributes(site_config)
end

Public Instance Methods

assign_attributes(hash) click to toggle source

set all configuration attributes as instance variables

# File lib/polly_phone/site.rb, line 39
def assign_attributes(hash)
  hash.each do |key, value|
    self.instance_variable_set(:"@#{key}", value)
  end
end
brands() click to toggle source

@return [Array]

# File lib/polly_phone/site.rb, line 46
def brands
  doc = Nokogiri::HTML(open(@main_url + @brands_url))
  doc.search(@brand_link_path).map do |item|
    link = item["href"]
    # if @brand_name_item exist we use another element within container
    item = item.at_xpath(@brand_name_item) if @brand_name_item
    # if @brand_source exist we use another param
    name = @brand_source ? item[@brand_source] : item.content
    [clr_str(name), link]
  end
end
brands_list() click to toggle source
# File lib/polly_phone/site.rb, line 58
def brands_list
  brands.to_h.keys
end
models(brand) click to toggle source
# File lib/polly_phone/site.rb, line 62
def models(brand)
  phone_info(@main_url + brands.to_h[brand])
end
models_list(brand) click to toggle source
# File lib/polly_phone/site.rb, line 66
def models_list(brand)
  models(brand).map{ |m| m[:name] }
end
page_parser(doc) click to toggle source

@return [Array]

# File lib/polly_phone/site.rb, line 87
def page_parser(doc)
  doc.search(@model_link_path).map do |item|
    item.css('br').each{ |br| br.replace " " }
    { name: item.at_xpath(@model_name_item).content,
      img:  item.at_xpath(@model_image_item)['src'],
      short: item.at_xpath(@model_image_item)['title'],
      link: item["href"] }
  end
end
phone_desc(url) click to toggle source

@return [Hash]

# File lib/polly_phone/site.rb, line 98
def phone_desc(url)
  type = DEFAULT_DESCRIPTION_TYPE
  desc = {}
  doc = Nokogiri::HTML(open(@main_url + url))
  doc.search(@spec_path).each do |item|
    item_name = item.at_xpath(@spec_type_item)
    type = item_name ? item_name.content : type
    desc[type] ||= {} 
    category = item.at(@spec_category_item)
    body = item.at(@spec_body_item)
    desc[type][category.content] = body.content if category && body
  end
  desc
end
phone_info(url) click to toggle source

@return [Array]

# File lib/polly_phone/site.rb, line 75
def phone_info(url)
  doc = Nokogiri::HTML(open(url))
  phones = page_parser(doc)
  while doc.at(@next_page)
    next_page_link = @main_url + doc.at(@next_page)["href"]
    doc = Nokogiri::HTML(open(next_page_link))
    phones += page_parser(doc)
  end
  phones
end