class GamePageScraper

Class that scrapes a games actual page

Public Class Methods

new(*_args) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 6
def initialize(*_args)
  @page_retriever = PageRetriever.new
end

Public Instance Methods

get_page_contents(url) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 39
def get_page_contents(url)
  @page_retriever.retrieve(url)
end
scrape(games_hash) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 10
def scrape(games_hash)
  result = Parallel.map(games_hash,
                        progress: 'Scraping additional per game data',
                        in_processes: 8) do |game|
    url = game[:url]
    scrape_game(game, url) unless url.nil?
  end
  games_hash.push(result).flatten!
end
scrape_developer(page_contents) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 67
def scrape_developer(page_contents)
  details = page_contents.xpath("//div[@class='details_block']")
  details.xpath(".//a[contains(@href, 'developer')]").text.strip
end
scrape_game(game, url) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 20
def scrape_game(game, url)
  page_contents = get_page_contents(url)
  game = scrape_game_with_valid_contents(game, page_contents) unless page_contents.nil?

  game
end
scrape_game_with_valid_contents(game, page_contents) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 27
def scrape_game_with_valid_contents(game, page_contents)
  game[:metacritic] = scrape_metacritic(page_contents)
  game[:tags] = scrape_tags(page_contents)
  game[:genres] = scrape_genres(page_contents)
  game[:developer] = scrape_developer(page_contents)
  game[:publisher] = scrape_publisher(page_contents)
  game[:min_spec] = scrape_min_spec(page_contents)
  game[:recommended_spec] = scrape_recommended_spec(page_contents)

  game
end
scrape_genres(page_contents) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 57
def scrape_genres(page_contents)
  genres = []
  details = page_contents.xpath("//div[@class='details_block']")
  details.xpath(".//a[contains(@href, 'genre')]").each do |genre|
    genres.push(genre.text.strip)
  end

  genres
end
scrape_metacritic(page_contents) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 43
def scrape_metacritic(page_contents)
  score_element = page_contents.xpath("//div[@id='game_area_metascore']/span").first
  score_element.text.to_i unless score_element.nil?
end
scrape_min_spec(page_contents) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 77
def scrape_min_spec(page_contents)
  spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_leftCol']/ul/ul")
  if spec_block.empty?
    spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_full']/ul/ul")
  end
  scrape_spec(spec_block)
end
scrape_publisher(page_contents) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 72
def scrape_publisher(page_contents)
  details = page_contents.xpath("//div[@class='details_block']")
  details.xpath(".//a[contains(@href, 'publisher')]").text.strip
end
scrape_spec(node) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 90
def scrape_spec(node)
  spec_array = node.text.split "\r"
  spec_array = node.text.split "\n" if spec_array.length == 1
  spec_hash = {}
  spec_array.each do |entry|
    value_pair = entry.split(':')
    next if value_pair.first.nil?
    key = value_pair.first.strip.to_sym
    value = value_pair.last.strip
    spec_hash[key] = value
  end

  spec_hash
end
scrape_tags(page_contents) click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 48
def scrape_tags(page_contents)
  tags = []
  page_contents.xpath("//div[contains(@class, 'popular_tags')]/a").each do |tag|
    tags.push(tag.text.strip)
  end

  tags
end