class GameListScraper
Core scraping class
Public Class Methods
new(*_args)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 5 def initialize(*_args) @game_list = [] @page_retriever = PageRetriever.new init_last_page_num end
Public Instance Methods
get_page_contents(url)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 24 def get_page_contents(url) @page_retriever.retrieve(url) end
get_review_contents(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 86 def get_review_contents(entry) node = entry.xpath(".//span[contains(@class, 'search_review_summary')]") result = nil result = node.attribute('data-store-tooltip').value unless node.empty? result end
init_last_page_num()
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 11 def init_last_page_num current_page_contents = get_page_contents(site + 1.to_s) @last_page_num = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]") .text.scan(/(\d+)/i).flatten.last.to_i rescue StandardError => e puts 'Could not connect to Steam Store' raise e end
scrape(first_page = 1, last_page = nil)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 35 def scrape(first_page = 1, last_page = nil) last_page ||= @last_page_num result = Parallel.map(first_page..last_page, progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to '), in_processes: 8) do |page| items_on_page = search_results(page) scrape_page(items_on_page) end @game_list.push(result).flatten! end
scrape_entry(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 107 def scrape_entry(entry) new_game = {} new_game[:url] = scrape_url(entry) new_game[:name] = scrape_name(entry) new_game[:price] = scrape_price(entry) new_game[:release_date] = scrape_release_date(entry) new_game[:platforms] = scrape_platforms(entry) new_game[:icon_url] = scrape_icon_url(entry) new_game[:review_score] = scrape_review_score(entry) new_game[:number_of_reviews] = scrape_number_of_reviews(entry) new_game end
scrape_icon_url(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 82 def scrape_icon_url(entry) entry.xpath(".//div[contains(@class, 'search_capsule')]/img").attribute('src').value end
scrape_name(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 58 def scrape_name(entry) entry.xpath(".//span[@class='title']").text end
scrape_number_of_reviews(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 100 def scrape_number_of_reviews(entry) review_string = get_review_contents(entry) matches = /.*\d\d[%] of the ([0-9,]*) user/i.match(review_string) num_reviews = matches[1] unless matches.nil? num_reviews end
scrape_page(current_page)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 46 def scrape_page(current_page) entries = [] current_page.each do |entry| entries.push(scrape_entry(entry)) end entries end
scrape_platforms(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 73 def scrape_platforms(entry) platforms = [] platforms.push('Windows') unless entry.xpath(".//span[contains(@class, 'win')]").empty? platforms.push('macOS') unless entry.xpath(".//span[contains(@class, 'mac')]").empty? platforms.push('Linux') unless entry.xpath(".//span[contains(@class, 'linux')]").empty? platforms.push('Steamplay') unless entry.xpath(".//span[contains(@class, 'steamplay')]").empty? platforms end
scrape_price(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 62 def scrape_price(entry) entry.xpath(".//div[contains(@class, 'search_price') and not(contains(@class, 'search_price_discount_combined'))]").text.strip.split('$').last end
scrape_release_date(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 67 def scrape_release_date(entry) Date.parse(entry.xpath(".//div[contains(@class, 'search_released')]").text) rescue nil end
scrape_review_score(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 93 def scrape_review_score(entry) review_string = get_review_contents(entry) matches = /.*(\d\d)[%]/i.match(review_string) review_percentage = matches[1] unless matches.nil? review_percentage end
scrape_url(entry)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 54 def scrape_url(entry) entry.attribute('href').value end
search_results(page_number)
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 28 def search_results(page_number) current_page_contents = get_page_contents(site + page_number.to_s) current_page_contents.xpath("//div[@id='search_result_container']/div/a") rescue nil end
site()
click to toggle source
# File lib/steam_scraper/game_list_scraper.rb, line 20 def site 'http://store.steampowered.com/search?page=' end