class Github::Trending::Scraper
Constants
- BASE_HOST
- BASE_URL
Public Class Methods
new()
click to toggle source
# File lib/github_trending/scraper.rb, line 26 def initialize @agent = Mechanize.new @agent.user_agent = "github-trending #{VERSION}" proxy = URI.parse(ENV['http_proxy']) if ENV['http_proxy'] @agent.set_proxy(proxy.host, proxy.port, proxy.user, proxy.password) if proxy end
Public Instance Methods
get(language = nil, since = nil)
click to toggle source
# File lib/github_trending/scraper.rb, line 33 def get(language = nil, since = nil) projects = [] page = @agent.get(generate_url_for_get(language, since)) page.search('.repo-list-item').each do |content| project = Project.new meta_data = content.search('.repo-list-meta').text project.lang, project.star_count = extract_lang_and_star_from_meta(meta_data) project.name = content.search('.repo-list-name a').text.split.join project.url = BASE_HOST + content.search('.repo-list-name a').first.attributes["href"].value project.description = content.search('.repo-list-description').text.gsub("\n", '').strip projects << project end fail ScrapeException if projects.empty? projects end
list_languages()
click to toggle source
# File lib/github_trending/scraper.rb, line 50 def list_languages languages = [] page = @agent.get(BASE_URL) page.search('div.select-menu-item a').each do |content| href = content.attributes['href'].value # objective-c++ => language = href.match(/github.com\/trending\?l=(.+)/).to_a[1] languages << CGI.unescape(language) if language end languages end
Private Instance Methods
extract_lang_and_star_from_meta(text)
click to toggle source
# File lib/github_trending/scraper.rb, line 84 def extract_lang_and_star_from_meta(text) meta_data = text.split('•').map { |x| x.gsub("\n", '').strip } if meta_data.size == 3 lang = meta_data[0] star_count = meta_data[1].gsub(',', '').to_i [lang, star_count] else star_count = meta_data[0].gsub(',', '').to_i ['', star_count] end end
generate_url_for_get(language, since)
click to toggle source
# File lib/github_trending/scraper.rb, line 64 def generate_url_for_get(language, since) language = language.to_s.gsub('_', '-') if language if since since = case since.to_sym when :d, :day, :daily then 'daily' when :w, :week, :weekly then 'weekly' when :m, :month, :monthly then 'monthly' else nil end end uri = Addressable::URI.parse(BASE_URL) if language || since uri.query_values = { l: language, since: since }.delete_if { |_k, v| v.nil? } end uri.to_s end