class AmazonBook::Scraper

Constants

BASE_PATH

Public Class Methods

scrape_book_page(url) click to toggle source
# File lib/AmazonBook/scraper.rb, line 18
def self.scrape_book_page(url) #add attributes to book objects
  #scrape book page's url
  #should return {:name => Harry Potter, :author => JK Rowling, :review => 4.9 out of 5.0...}
  book = AmazonBook::Book.all.detect {|book| book.url == url}
  doc = Nokogiri::HTML(open(url))
  book_hash = {}
  book_hash[:author] = doc.css("#dp-container #centerCol #booksTitle #byline a").css(".contributorNameID").children.text
  book_hash[:review] = doc.css("#dp-container #centerCol #averageCustomerReviews_feature_div #averageCustomerReviews span span").attr("title").value
  book_hash[:price] = doc.css("#dp-container #centerCol #MediaMatrix #tmmSwatches ul li").css("a > span").text.split[1] #first price in the leftest box, usually the price of kindle version
  book_hash[:availability] = doc.css(".en_US #rightCol #buybox #availability").css("span").children.text.gsub("\n", "").squeeze(" ")
  # book_hash[:publisher]
  book.add_attributes(book_hash)
  book_hash
end
scrape_list_page() click to toggle source
# File lib/AmazonBook/scraper.rb, line 6
def self.scrape_list_page #returns a list of books
  doc = Nokogiri::HTML(open("./././fixtures/booklist.html"))
  book_list = [] #should return [{:name => "Harry Potter", :url => "www.harrypotter.com"},{:name => "Winnie the Pooh", :url => "www.pooh.com"}]
  doc.css("#zg_centerListWrapper .zg_itemWrapper").each_with_index do |a,i|
    book_list [i] = {}
    book_list[i][:name] = "#{a.css("a div").children.attr('alt').value}"
    book_list[i][:url] = a.css("a").attr('href').value
  end

  book_list
end