class Scraper

Constants

BASE_URL

Public Class Methods

scrape_movie_by_id(id) click to toggle source
# File lib/imdb_term/scraper.rb, line 55
def self.scrape_movie_by_id(id)
  doc = Nokogiri::HTML(open("#{BASE_URL}/title/tt#{id}"))
  title_overview = doc.css('div#title-overview-widget')
  title_cast = doc.css('div#titleCast')
  directors = title_overview.css('div.plot_summary_wrapper span[itemprop="director"] span')
  movie = {
      :id => id,
      :title => title_overview.css('div.title_wrapper > h1').text.strip[(0..-8)],
      :release_year => title_overview.css('span#titleYear a').text.strip,
      :content_rating => title_overview.css('div.subtext').text,
      :runtime => title_overview.css('div.subtext time').text.strip,
      :genres => title_overview.css('div.subtext span.itemprop').map { |e| e.text },
      :summary => title_overview.css('div.plot_summary_wrapper div.summary_text').text.strip,
      :director => directors.size > 1 ? directors.map{|e| e.text}.join(', ') : directors.text,
      :stars => title_overview.css('div.plot_summary_wrapper span[itemprop="actors"] span').map { |e| e.text }
  }
  movie
end
scrape_movie_by_title(title) click to toggle source
# File lib/imdb_term/scraper.rb, line 38
  def self.scrape_movie_by_title(title) # /find?q=Iron+Man
    doc = Nokogiri::HTML(open("#{BASE_URL}/find?q=#{title}"))
    movies = doc.css('div.findSection').first.css('td.result_text')
    movies_data = Array.new
    movies.each_with_index do |movie, index|
      type = movie.text.scan(/\([\w ]+\)/).reverse.detect{|e| e[1].match(/[A-Za-z]/) || break }
      movie_data = {
        :id => movie.css('a').attribute('href').value.split('/')[2][(2..-1)],
        :title => movie.css('a').text,
        :release_year => movie.text.scan(/[12]\d{3}/)[-1],
        :type => begin type.match(/[\w ]+/) rescue nil end
      }
      movies_data << movie_data
    end
    movies_data
  end

  def self.scrape_movie_by_id(id)
    doc = Nokogiri::HTML(open("#{BASE_URL}/title/tt#{id}"))
    title_overview = doc.css('div#title-overview-widget')
    title_cast = doc.css('div#titleCast')
    directors = title_overview.css('div.plot_summary_wrapper span[itemprop="director"] span')
    movie = {
        :id => id,
        :title => title_overview.css('div.title_wrapper > h1').text.strip[(0..-8)],
        :release_year => title_overview.css('span#titleYear a').text.strip,
        :content_rating => title_overview.css('div.subtext').text,
        :runtime => title_overview.css('div.subtext time').text.strip,
        :genres => title_overview.css('div.subtext span.itemprop').map { |e| e.text },
        :summary => title_overview.css('div.plot_summary_wrapper div.summary_text').text.strip,
        :director => directors.size > 1 ? directors.map{|e| e.text}.join(', ') : directors.text,
        :stars => title_overview.css('div.plot_summary_wrapper span[itemprop="actors"] span').map { |e| e.text }
    }
    movie
  end

end
scrape_now_playing() click to toggle source
# File lib/imdb_term/scraper.rb, line 8
def self.scrape_now_playing
  doc = Nokogiri::HTML(open("#{BASE_URL}/movies-in-theaters"))
  movies = doc.css('div#main div.list.detail.sub-list')
            .last.css('td#img_primary a')
  movies_data = Array.new
  movies.each do |movie|
    movie_data = {
      :id    => movie.attribute('href').value.split('/')[2][(2..-1)],
      :title => movie.css('img').attribute('title').value[(0..-7)].strip
    }
    movies_data << movie_data
  end
  movies_data
end
scrape_opening_this_week() click to toggle source
# File lib/imdb_term/scraper.rb, line 23
def self.scrape_opening_this_week
  doc = Nokogiri::HTML(open("#{BASE_URL}/movies-in-theaters"))
  movies = doc.css('div#main div.list.detail.sub-list')
           .first.css('td#img_primary a')
  movies_data = Array.new
  movies.each do |movie|
    movie_data = {
      :id    => movie.attribute('href').value.split('/')[2][(2..-1)],
      :title => movie.css('img').attribute('title').value[(0..-7)].strip
    }
    movies_data << movie_data
  end
  movies_data
end