class NPRBestBooks::Scraper

require ‘rubygems’ require ‘nokogiri’ require ‘open-uri’ require ‘json’

Public Class Methods

books() click to toggle source
# File lib/npr_best_books/scraper.rb, line 8
def self.books
  puts "Loading books..."
  html = Nokogiri::HTML(open("http://web.archive.org/web/20160622040558/http://apps.npr.org/best-books-2015/"))
  html_script_data = html.css("script").select {|s| s.children.text.include?("BOOKS") }
  script_data = html_script_data[0].children.text
  all_books = self.parse_data(script_data)
  parsed = JSON.parse(all_books)
end
lookup_amazon(url) click to toggle source
# File lib/npr_best_books/scraper.rb, line 24
def self.lookup_amazon(url)
  amazon_html = Nokogiri::HTML(open(url))
  amazon_html
end
parse_data(data) click to toggle source
# File lib/npr_best_books/scraper.rb, line 17
def self.parse_data(data)
  part1 = data.gsub("\n    window.BOOKS = [{","[{")
  part2 = part1.gsub("}];","}]")
  part3 = part2.gsub("ANALYTICS.setupChartbeat();","")
  part3
end