class NPRBestBooks::Scraper
require ‘rubygems’ require ‘nokogiri’ require ‘open-uri’ require ‘json’
Public Class Methods
books()
click to toggle source
# File lib/npr_best_books/scraper.rb, line 8 def self.books puts "Loading books..." html = Nokogiri::HTML(open("http://web.archive.org/web/20160622040558/http://apps.npr.org/best-books-2015/")) html_script_data = html.css("script").select {|s| s.children.text.include?("BOOKS") } script_data = html_script_data[0].children.text all_books = self.parse_data(script_data) parsed = JSON.parse(all_books) end
lookup_amazon(url)
click to toggle source
# File lib/npr_best_books/scraper.rb, line 24 def self.lookup_amazon(url) amazon_html = Nokogiri::HTML(open(url)) amazon_html end
parse_data(data)
click to toggle source
# File lib/npr_best_books/scraper.rb, line 17 def self.parse_data(data) part1 = data.gsub("\n window.BOOKS = [{","[{") part2 = part1.gsub("}];","}]") part3 = part2.gsub("ANALYTICS.setupChartbeat();","") part3 end