class WhatsOnNetflix::Scraper

Public Class Methods

get_html(url) click to toggle source
# File lib/whats_on_netflix/scraper.rb, line 8
def self.get_html(url)
  Nokogiri::HTML(open(url))
end
scrape_imdb_info(name) click to toggle source
# File lib/whats_on_netflix/scraper.rb, line 26
def self.scrape_imdb_info(name)

  search_page = Nokogiri::HTML(open("http://www.imdb.com/find?s=tt&q=" + URI.escape(name)))
  
  movie_page = Nokogiri::HTML(open("http://www.imdb.com" + "#{search_page.css("td a").attribute("href").value}"))

  info = {}

  info[:plot] = movie_page.css("div.summary_text").text.strip
  info[:genre] = ""
  info[:stars] = ""
  info[:year] = ""

  # getting info[:genre] into a readable format

  movie_page.css('span[itemprop="genre"]').each do |genre|
      info[:genre].concat("| #{genre.text} |")
  end

  # getting info[:stars] into a readable format

  movie_page.css('span[itemprop="actors"]').each do |actor|
    info[:stars].concat("#{actor.text.strip} ")
  end

  # getting info[:year] - TV show and movie pages are formatted a little differently

  if movie_page.css('a[title="See more release dates"]').text.include?("TV Series")
    info[:year] = movie_page.css('a[title="See more release dates"]').text
  else
    info[:year] = movie_page.css('span#titleYear').text.strip.gsub("(", "").gsub(")", "")
  end

  info
end
scrape_title_list(url) click to toggle source
# File lib/whats_on_netflix/scraper.rb, line 12
def self.scrape_title_list(url)
  html = self.get_html(url)

  titles = []

  html.css("h4 + ul li").each do |title|
      titles << title.text
  end

  titles
end