class AmazonWish
Attributes
asin[R]
title[R]
Public Class Methods
attributes_from_responses(page_responses)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 92 def self.attributes_from_responses(page_responses) page_responses.each_with_object(Array.new) do |pr, item_attrs| Nokogiri::HTML(pr).css('ul#g-items li').each do |li| aln = li.css('.a-link-normal') attrs_hash = { title: aln.attribute('title').value, href: aln.attribute('href').value } item_attrs << attrs_hash end end end
draps_from_list_items(list_items)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 29 def self.draps_from_list_items(list_items) list_items.each_with_object(Array.new) do |li, wish_asins| drap = li['data-reposition-action-params'] wish_asins << external_id_from_drap(drap) end end
external_id_from_drap(drap)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 36 def self.external_id_from_drap(drap) # the page refers to the ASIN as "itemExternalID" attrs = drap.split(',') attr_substrings = attrs.map { |elem| elem.split(':') } ied_attr = attr_substrings.find { |ss| ss.include?("{\"itemExternalId\"")} id_string = ied_attr.last ids_arr = id_string.split('|') ids_arr.first end
get_title_from_page(page)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 81 def self.get_title_from_page(page) span_title = page.css('span[id$="roductTitle"]').children.text.strip return span_title unless span_title == "" meta_title = page.css('meta[name="title"]')[0].attribute('content').value end
item_from_asin(asin_arg)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 54 def self.item_from_asin(asin_arg) item_url = 'https://www.amazon.com/dp/' + asin_arg response = RestClient.get(item_url) page = Nokogiri::HTML(response) title = get_title_from_page(page) # not a typo, css selectors are #=> case sensetive, and we need to capture e.g. both "productTitle" and "ebookProductTitle" # price = page.css('priceblock_ourprice') # TODO: parse prices # description = parse_feature_bullets(page.css('div#feature-bullets')) # TODO: get description parsing to work for different types of items AmazonWish.new(asin_arg, title) end
item_from_attributes(attr_hash)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 72 def self.item_from_attributes(attr_hash) asin = attr_hash[:href].split('/')[2] AmazonWish.new(asin, attr_hash[:title]) end
list_items_from_response(page_responses)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 20 def self.list_items_from_response(page_responses) page_responses.each_with_object(Array.new) do |response, list_items| page = Nokogiri::HTML(response) page.css('ul#g-items li').each do |li| list_items << li end end end
new(asin, title)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 5 def initialize(asin, title) @title = title @asin = asin end
parse_feature_bullets(feature_bullets_div)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 77 def self.parse_feature_bullets(feature_bullets_div) bullets = feature_bullets_div.css('ul li') end
parse_wishes_from_pages(page_responses)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 14 def self.parse_wishes_from_pages(page_responses) list_items = self.list_items_from_response(page_responses) wish_asins = self.draps_from_list_items(list_items) wishes_from_asins(wish_asins) end
trim_title(untrimmed_title)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 87 def self.trim_title(untrimmed_title) chars = untrimmed_title.chars chars.drop_while(&TITLE_TRIMMER).reverse.drop_while(&TITLE_TRIMMER).reverse.join end
wishes_from_asins(asins)
click to toggle source
wishes_from_attributes(attr_hash_array)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 68 def self.wishes_from_attributes(attr_hash_array) attr_hash_array.map { |attr_hash| item_from_attributes(attr_hash)} end
Public Instance Methods
url()
click to toggle source
# File lib/amazon_wish_miner/amazon_wish.rb, line 10 def url "https://www.amazon.com/dp/#{@asin}" end