class Repos::GithubData
Public Class Methods
new(repo_user, repo_name, github_token, github_password, github_account, user_agent)
click to toggle source
# File lib/repocrawler/crawler.rb, line 11 def initialize(repo_user, repo_name, github_token, github_password, github_account, user_agent) @GITHUB_README_URL = "https://raw.githubusercontent.com/#{repo_user}/#{repo_name}/master" @GITHUB_API_BASE_URL = "https://api.github.com/repos/#{repo_user}/#{repo_name}" @access_token = github_token @github_password = github_password @github_account = github_account @user_agent = user_agent @repo_user = repo_user @repo_name = repo_name end
Public Instance Methods
get_commits_history()
click to toggle source
get commits history
# File lib/repocrawler/crawler.rb, line 118 def get_commits_history commits_info = [] stop = false page = 1 until stop commits_fetch = HTTParty.get(@GITHUB_API_BASE_URL + "/commits?page=#{page}&access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if commits_fetch.is_a?(Hash) && commits_fetch['message'] === 'Not Found' break end if commits_fetch.count === 0 stop = true end commits_fetch.each do |commit| commits_info << { "committer" => commit['commit']['committer']['name'], "created_at" => commit['commit']['committer']['date'] } end page += 1 end commits_info.reverse! end
get_contributors()
click to toggle source
Get the contributors
# File lib/repocrawler/crawler.rb, line 39 def get_contributors contributors = HTTParty.get(@GITHUB_API_BASE_URL + "/contributors?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if contributors.is_a?(Hash) && contributors['message'] === 'Not Found' contributors = nil else contributors.map! do |contributor| { 'name' => contributor['login'], 'contributions' => contributor['contributions'] } end end contributors end
get_forks()
click to toggle source
get numbers of forks, stars and issues
# File lib/repocrawler/crawler.rb, line 73 def get_forks repos_meta = HTTParty.get(@GITHUB_API_BASE_URL + "?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if repos_meta.is_a?(Hash) && repos_meta['message'] === 'Not Found' forks = nil else forks = repos_meta['forks_count'] end forks end
get_issues()
click to toggle source
get current open issues
# File lib/repocrawler/crawler.rb, line 102 def get_issues repos_meta = HTTParty.get(@GITHUB_API_BASE_URL + "?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if repos_meta.is_a?(Hash) && repos_meta['message'] === 'Not Found' issues = nil else issues = repos_meta['open_issues_count'] end issues end
get_issues_info()
click to toggle source
get information of the closed issues
# File lib/repocrawler/crawler.rb, line 164 def get_issues_info closed_issues = [] stop = false page = 1 until stop issue_fetch = HTTParty.get(@GITHUB_API_BASE_URL + "/issues?state=closed&page=#{page}&access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if issue_fetch.is_a?(Hash) && issue_fetch['message'] === 'Not Found' break end if issue_fetch.count === 0 stop = true end issue_fetch.each do |issue| closed_issues << { 'number' => issue['number'], 'created_at' => issue['created_at'], 'closed_at' => issue['closed_at'], 'duration' => (Date.parse(issue['closed_at']) - Date.parse(issue['created_at'])).to_i } end page += 1 end closed_issues.reverse! end
get_last_commits_days()
click to toggle source
get the date of the last commit
# File lib/repocrawler/crawler.rb, line 199 def get_last_commits_days commits_fetch = HTTParty.get(@GITHUB_API_BASE_URL + "/commits?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if commits_fetch.is_a?(Hash) && commits_fetch['message'] === 'Not Found' last_commit = nil else last_commit_date = commits_fetch.first['commit']['author']['date'] last_commit = (Date.today - Date.parse(last_commit_date)).to_i end last_commit end
get_last_year_commit_activity()
click to toggle source
get the commit activity in last year
# File lib/repocrawler/crawler.rb, line 23 def get_last_year_commit_activity last_year_commit_activity = HTTParty.get(@GITHUB_API_BASE_URL + "/stats/commit_activity?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if last_year_commit_activity.is_a?(Hash) && last_year_commit_activity['message'] === 'Not Found' last_year_commit_activity = nil else last_year_commit_activity.delete_if {|record| record['total'] == 0} end last_year_commit_activity end
get_readme_raw_text()
click to toggle source
get readme raw text
# File lib/repocrawler/crawler.rb, line 250 def get_readme_raw_text readme = HTTParty.get(@GITHUB_API_BASE_URL + "/readme?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if readme.is_a?(Hash) && readme['message'] === 'Not Found' return nil else readme_content = { 'content' => readme['content'], 'encoding' => readme['encoding'] } end readme_content end
get_readme_word_count()
click to toggle source
get the readme file
# File lib/repocrawler/crawler.rb, line 215 def get_readme_word_count github_contents = HTTParty.get(@GITHUB_API_BASE_URL + "/contents?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if github_contents.is_a?(Hash) && github_contents['message'] === 'Not Found' return nil else readme_file = '' github_contents.each do |content| readme_file = content['name'] if content['name'] =~ /^README/ end stop_words = [] File.open(File.expand_path("../../public/stop_words.txt", File.dirname(__FILE__)), "r") do |f| f.each_line do |line| stop_words << line.gsub(/\n/,"") end end readme = HTTParty.get(@GITHUB_README_URL + "/#{readme_file}") words = readme.split(' ') freqs = Hash.new(0) words.each do |word| if word =~ /^\w+$/ && !stop_words.include?(word.downcase) freqs[word] += 1 end end freqs = freqs.sort_by { |word, freq| freq }.reverse! return freqs end end
get_stars()
click to toggle source
# File lib/repocrawler/crawler.rb, line 87 def get_stars repos_meta = HTTParty.get(@GITHUB_API_BASE_URL + "?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if repos_meta.is_a?(Hash) && repos_meta['message'] === 'Not Found' stars = nil else stars = repos_meta['stargazers_count'] end stars end
get_test()
click to toggle source
check if the project has test TODO: recursively search
# File lib/repocrawler/crawler.rb, line 269 def get_test has_test = 0; contents = HTTParty.get(@GITHUB_API_BASE_URL + "/contents?access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) if contents.is_a?(Hash) && contents['message'] === 'Not Found' return has_test else test_folder = contents.select do |content| match = content['name'] =~ /(spec)|(test)/ !match.nil? && content['type'] === 'dir' end test_folder.empty? ? has_test = 0 : has_test = 1 return has_test end end
get_total_commits()
click to toggle source
get the total commits
# File lib/repocrawler/crawler.rb, line 58 def get_total_commits contributors = get_contributors if contributors.nil? commits = nil else commits = contributors.reduce(0) do |sum, num| sum + num['contributions'] end end commits end
get_total_issues()
click to toggle source
get total number of issues
# File lib/repocrawler/crawler.rb, line 150 def get_total_issues issues_fetch = HTTParty.get(@GITHUB_API_BASE_URL + "/issues?state=all&access_token=#{@access_token}", headers: { "User-Agent" => @user_agent }) issues_num = 0 return issues_num if issues_fetch.is_a?(Hash) && issues_fetch['message'] === 'Not Found' issues_num = issues_fetch.first['number'] if issues_fetch.count != 0 issues_num end