class AmazonWishList

Constants

REVEAL_OPTIONS
SORT_OPTIONS

Attributes

id[RW]
wishes[RW]

Public Class Methods

check_for_redirect(url) click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 92
def self.check_for_redirect(url)
  begin
    response = RestClient::Request.execute(method: :get, url: url, max_redirects: 0)
  rescue RestClient::ExceptionWithResponse => err
    if err.response.code / 100 == 3
      url = err.response.headers[:location]
      retry
    else
      raise err
    end
  end
  url
end
find_lek_from_response(response) click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 76
def self.find_lek_from_response(response)
  # As of the time of writing this, "lastEvaluatedKey", abbreviated as "lek",
  # is used to keep track of what portions of the wishlist have already been
  # loaded, and is sent in the query string of ajax calls to get the next page
  start_of_lek = response.body.split('name="lastEvaluatedKey" value="')[1]
  start_of_lek.split('" class="lastEvaluatedKey"')[0]
end
get_all_wishlist_pages(url_without_qstring, query_params) click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 54
def self.get_all_wishlist_pages(url_without_qstring, query_params)
  responses = Array.new
  loop do
    response = self.get_wishlist_page(url_without_qstring, query_params)
    responses << response
    return responses if response.body.include?("Find a gift") # as of the
    #=> time this was written, this phrase appears only on the last page
    lek = self.find_lek_from_response(response)
    query_params[:lek] = lek # the rest of the query_params hash stays the same
  end
end
get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') click to toggle source
def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
  raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
  raise "invalid sort" unless SORT_OPTIONS[sort]
  query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
  # lek is nil for the first page
  url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"
  pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
  wishes = AmazonWish.parse_wishes_from_pages(pages)
  AmazonWishList.new(amazon_list_id, wishes)
end

end

# File lib/amazon_wish_miner/amazon_wish_list.rb, line 49
def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
  wishes = AmazonWish.wishes_from_attributes(wishlist_attributes(amazon_list_id, reveal, sort, tld))
  AmazonWishList.new(amazon_list_id, wishes)
end
get_wishlist_page(url_without_qstring, query_params) click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 66
def self.get_wishlist_page(url_without_qstring, query_params)
  query_string = self.page_query_string(query_params)
  RestClient.get(url_without_qstring + query_string)
end
new(id, wishes) click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 10
def initialize(id, wishes)
  @id = id
  @wishes = wishes
end
page_query_string(query_params) click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 71
def self.page_query_string(query_params)
  "?reveal=#{query_params[:reveal]}&layout=standard&sort=#{query_params[:sort_string]})" +
  (query_params[:lek] ? "&lek=#{query_params[:lek]}&type=wishlist&ajax=true" : '')
end
wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') click to toggle source

the method below only sends one request, so it less likely to be identified as being used by a scraper

# File lib/amazon_wish_miner/amazon_wish_list.rb, line 16
def self.wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
  raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
  raise "invalid sort" unless SORT_OPTIONS[sort]

  query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
  # lek is nil for the first page
  url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"

  pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
  AmazonWish.attributes_from_responses(pages)
end
wishlist_from_url(url) click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 84
def self.wishlist_from_url(url)
  url = HTTParty.get(url).request.last_uri.to_s
  #url = check_for_redirect(url)
  id_start = url.split('/wishlist/')[1]
  id = id_start.split('/').find { |str| str != 'ls' }
  get_wishlist(id)
end