class GamePageScraper
Class that scrapes a games actual page
Public Class Methods
new(*_args)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 6 def initialize(*_args) @page_retriever = PageRetriever.new end
Public Instance Methods
get_page_contents(url)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 39 def get_page_contents(url) @page_retriever.retrieve(url) end
scrape(games_hash)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 10 def scrape(games_hash) result = Parallel.map(games_hash, progress: 'Scraping additional per game data', in_processes: 8) do |game| url = game[:url] scrape_game(game, url) unless url.nil? end games_hash.push(result).flatten! end
scrape_developer(page_contents)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 67 def scrape_developer(page_contents) details = page_contents.xpath("//div[@class='details_block']") details.xpath(".//a[contains(@href, 'developer')]").text.strip end
scrape_game(game, url)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 20 def scrape_game(game, url) page_contents = get_page_contents(url) game = scrape_game_with_valid_contents(game, page_contents) unless page_contents.nil? game end
scrape_game_with_valid_contents(game, page_contents)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 27 def scrape_game_with_valid_contents(game, page_contents) game[:metacritic] = scrape_metacritic(page_contents) game[:tags] = scrape_tags(page_contents) game[:genres] = scrape_genres(page_contents) game[:developer] = scrape_developer(page_contents) game[:publisher] = scrape_publisher(page_contents) game[:min_spec] = scrape_min_spec(page_contents) game[:recommended_spec] = scrape_recommended_spec(page_contents) game end
scrape_genres(page_contents)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 57 def scrape_genres(page_contents) genres = [] details = page_contents.xpath("//div[@class='details_block']") details.xpath(".//a[contains(@href, 'genre')]").each do |genre| genres.push(genre.text.strip) end genres end
scrape_metacritic(page_contents)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 43 def scrape_metacritic(page_contents) score_element = page_contents.xpath("//div[@id='game_area_metascore']/span").first score_element.text.to_i unless score_element.nil? end
scrape_min_spec(page_contents)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 77 def scrape_min_spec(page_contents) spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_leftCol']/ul/ul") if spec_block.empty? spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_full']/ul/ul") end scrape_spec(spec_block) end
scrape_publisher(page_contents)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 72 def scrape_publisher(page_contents) details = page_contents.xpath("//div[@class='details_block']") details.xpath(".//a[contains(@href, 'publisher')]").text.strip end
scrape_recommended_spec(page_contents)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 85 def scrape_recommended_spec(page_contents) spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_rightCol']/ul/ul") scrape_spec(spec_block) end
scrape_spec(node)
click to toggle source
# File lib/steam_scraper/game_page_scraper.rb, line 90 def scrape_spec(node) spec_array = node.text.split "\r" spec_array = node.text.split "\n" if spec_array.length == 1 spec_hash = {} spec_array.each do |entry| value_pair = entry.split(':') next if value_pair.first.nil? key = value_pair.first.strip.to_sym value = value_pair.last.strip spec_hash[key] = value end spec_hash end