class StarWarsComics::Scraper

Public Class Methods

scrape_issue_info(issue) click to toggle source
# File lib/star-wars-comics/scraper.rb, line 34
def self.scrape_issue_info(issue)
  issue_info =  Nokogiri::HTML(open(BASE_PATH + issue.path)).css(
                "aside.portable-infobox")

  issue.name = issue_info.css("h2.pi-title").text

  attributions = issue_info.css("section.pi-item.pi-group div.pi-item")

  attributions.each do |attrib|
    attrib_type = attrib.css("h3.pi-data-label").text
    attrib_value = attrib.css("div.pi-data-value").text.sub(/\[.*\]/, "")

    unless attrib.css("div.pi-data-value a").empty?
      attrib_link = attrib.css("div.pi-data-value a").attribute("href").value
    end

    if attrib_type == "Writer"
      issue.writer =  StarWarsComics::Artists::Writer.find_or_create_by_name(
                      attrib_value, attrib_link)
    elsif attrib_type == "Penciller"
      issue.penciller = StarWarsComics::Artists::Penciller.find_or_create_by_name(
                        attrib_value, attrib_link)
    elsif attrib_type == "Letterer"
      issue.letterer =  StarWarsComics::Artists::Letterer.find_or_create_by_name(
                        attrib_value, attrib_link)
    elsif attrib_type == "Colorist"
      issue.colorist =  StarWarsComics::Artists::Colorist.find_or_create_by_name(
                        attrib_value, attrib_link)

    elsif attrib_type == "Publication date"
      issue.pub_date = attrib_value
    elsif attrib_type == "Pages"
      issue.pages = attrib_value
    end

  end
end
scrape_issues(series) click to toggle source
# File lib/star-wars-comics/scraper.rb, line 21
def self.scrape_issues(series)
  issues =  Nokogiri::HTML(open(BASE_PATH + series.path)).css('td[style*="background-color:"] + td i a')

  last_issue = nil
  issues.each do |issue_link|
    issue = StarWarsComics::Issue.find_or_create_by_name(issue_link["title"], issue_link["href"])
    issue.series = series
    issue.last_issue = last_issue
    issue.last_issue.next_issue = issue unless last_issue == nil
    last_issue = issue
  end
end
scrape_series(path, all_series) click to toggle source
# File lib/star-wars-comics/scraper.rb, line 3
def self.scrape_series(path, all_series)
  categories = Nokogiri::HTML(open(BASE_PATH + path)).css("div.CategoryTreeItem a")

  categories.each do |category|
    self.scrape_series(category["href"], all_series) unless self.dont_include(category)
  end

  series_link = Nokogiri::HTML(open(BASE_PATH + path)).css("div#mw-pages div a").first

  unless series_link == nil
    series = StarWarsComics::Series.new(series_link.text, series_link["href"])
    series.desc = Nokogiri::HTML(open(BASE_PATH + series.path)).css("div#mw-content-text p").first.text.sub(/\[.*\]/, "")
    all_series << series
  end

  all_series
end

Private Class Methods

dont_include(category) click to toggle source
# File lib/star-wars-comics/scraper.rb, line 74
def self.dont_include(category)
  category.text == "Canon comic strips" ||
    category.text == "Star Wars Rebels Magazine comics" ||
    category.text == "Star Wars Adventures artists" ||
    category.text == "Star Wars Adventures stories" ||
    category.text == "Star Wars Adventures writers"
  end