class Repos::GithubData

Public Class Methods

new(repo_user, repo_name, github_token, github_password, github_account, user_agent) click to toggle source
# File lib/repocrawler/crawler.rb, line 11
def initialize(repo_user, repo_name, github_token, github_password, github_account, user_agent)
  @GITHUB_README_URL = "https://raw.githubusercontent.com/#{repo_user}/#{repo_name}/master"
  @GITHUB_API_BASE_URL = "https://api.github.com/repos/#{repo_user}/#{repo_name}"
  @access_token = github_token
  @github_password = github_password
  @github_account = github_account
  @user_agent = user_agent
  @repo_user = repo_user
  @repo_name = repo_name
end

Public Instance Methods

get_commits_history() click to toggle source

get commits history

# File lib/repocrawler/crawler.rb, line 118
def get_commits_history
  commits_info = []
  stop = false
  page = 1

  until stop
    commits_fetch = HTTParty.get(@GITHUB_API_BASE_URL + "/commits?page=#{page}&access_token=#{@access_token}", headers: {
      "User-Agent" => @user_agent
    })

    if commits_fetch.is_a?(Hash) && commits_fetch['message'] === 'Not Found'
      break
    end

    if commits_fetch.count === 0
      stop = true
    end

    commits_fetch.each do |commit|
      commits_info << {
        "committer"     => commit['commit']['committer']['name'],
        "created_at"    => commit['commit']['committer']['date']
      }
    end

    page += 1
  end

  commits_info.reverse!
end
get_contributors() click to toggle source

Get the contributors

# File lib/repocrawler/crawler.rb, line 39
def get_contributors
  contributors = HTTParty.get(@GITHUB_API_BASE_URL + "/contributors?access_token=#{@access_token}", headers: {
    "User-Agent" => @user_agent
  })
  if contributors.is_a?(Hash) && contributors['message'] === 'Not Found'
    contributors = nil
  else
    contributors.map! do |contributor|
      {
        'name' => contributor['login'],
        'contributions' => contributor['contributions']
      }
    end
  end

  contributors
end
get_forks() click to toggle source

get numbers of forks, stars and issues

# File lib/repocrawler/crawler.rb, line 73
def get_forks
  repos_meta = HTTParty.get(@GITHUB_API_BASE_URL + "?access_token=#{@access_token}", headers: {
    "User-Agent" => @user_agent
  })

  if repos_meta.is_a?(Hash) && repos_meta['message'] === 'Not Found'
    forks = nil
  else
    forks = repos_meta['forks_count']
  end

  forks
end
get_issues() click to toggle source

get current open issues

# File lib/repocrawler/crawler.rb, line 102
def get_issues
  repos_meta = HTTParty.get(@GITHUB_API_BASE_URL + "?access_token=#{@access_token}", headers: {
    "User-Agent" => @user_agent
  })

  if repos_meta.is_a?(Hash) && repos_meta['message'] === 'Not Found'
    issues = nil
  else
    issues = repos_meta['open_issues_count']
  end

  issues
end
get_issues_info() click to toggle source

get information of the closed issues

# File lib/repocrawler/crawler.rb, line 164
def get_issues_info
  closed_issues = []
  stop = false
  page = 1

  until stop
    issue_fetch = HTTParty.get(@GITHUB_API_BASE_URL + "/issues?state=closed&page=#{page}&access_token=#{@access_token}", headers: {
      "User-Agent" => @user_agent
    })

    if issue_fetch.is_a?(Hash) && issue_fetch['message'] === 'Not Found'
      break
    end

    if issue_fetch.count === 0
      stop = true
    end


    issue_fetch.each do |issue|
      closed_issues << {
        'number'    => issue['number'],
        'created_at'  => issue['created_at'],
        'closed_at'   => issue['closed_at'],
        'duration'    => (Date.parse(issue['closed_at']) - Date.parse(issue['created_at'])).to_i
      }
    end

    page += 1
  end

  closed_issues.reverse!
end
get_last_commits_days() click to toggle source

get the date of the last commit

# File lib/repocrawler/crawler.rb, line 199
def get_last_commits_days
  commits_fetch = HTTParty.get(@GITHUB_API_BASE_URL + "/commits?access_token=#{@access_token}", headers: {
      "User-Agent" => @user_agent
  })

  if commits_fetch.is_a?(Hash) && commits_fetch['message'] === 'Not Found'
    last_commit = nil
  else
    last_commit_date = commits_fetch.first['commit']['author']['date']
    last_commit = (Date.today - Date.parse(last_commit_date)).to_i
  end

  last_commit
end
get_last_year_commit_activity() click to toggle source

get the commit activity in last year

# File lib/repocrawler/crawler.rb, line 23
def get_last_year_commit_activity
  last_year_commit_activity = HTTParty.get(@GITHUB_API_BASE_URL + "/stats/commit_activity?access_token=#{@access_token}", headers: {
    "User-Agent" => @user_agent
  })

  if last_year_commit_activity.is_a?(Hash) && last_year_commit_activity['message'] === 'Not Found'
    last_year_commit_activity = nil
  else
    last_year_commit_activity.delete_if {|record| record['total'] == 0}
  end

  last_year_commit_activity

end
get_readme_raw_text() click to toggle source

get readme raw text

# File lib/repocrawler/crawler.rb, line 250
def get_readme_raw_text
  readme = HTTParty.get(@GITHUB_API_BASE_URL + "/readme?access_token=#{@access_token}", headers: {
    "User-Agent" => @user_agent
  })

  if readme.is_a?(Hash) && readme['message'] === 'Not Found'
    return nil
  else
    readme_content = {
      'content'   => readme['content'],
      'encoding'  => readme['encoding']
    }
  end

  readme_content
end
get_readme_word_count() click to toggle source

get the readme file

# File lib/repocrawler/crawler.rb, line 215
def get_readme_word_count
  github_contents = HTTParty.get(@GITHUB_API_BASE_URL + "/contents?access_token=#{@access_token}", headers: {
    "User-Agent" => @user_agent
  })

  if github_contents.is_a?(Hash) && github_contents['message'] === 'Not Found'
    return nil
  else
    readme_file = ''
    github_contents.each do |content|
      readme_file = content['name'] if content['name'] =~ /^README/
    end

    stop_words = []
    File.open(File.expand_path("../../public/stop_words.txt",  File.dirname(__FILE__)), "r") do |f|
      f.each_line do |line|
        stop_words << line.gsub(/\n/,"")
      end
    end

    readme = HTTParty.get(@GITHUB_README_URL + "/#{readme_file}")
    words = readme.split(' ')
    freqs = Hash.new(0)
    words.each do |word|
      if word =~ /^\w+$/ && !stop_words.include?(word.downcase)
        freqs[word] += 1
      end
    end
    freqs = freqs.sort_by { |word, freq| freq }.reverse!

    return freqs
  end
end
get_stars() click to toggle source
# File lib/repocrawler/crawler.rb, line 87
def get_stars
  repos_meta = HTTParty.get(@GITHUB_API_BASE_URL + "?access_token=#{@access_token}", headers: {
    "User-Agent" => @user_agent
  })

  if repos_meta.is_a?(Hash) && repos_meta['message'] === 'Not Found'
    stars = nil
  else
    stars = repos_meta['stargazers_count']
  end

  stars
end
get_test() click to toggle source

check if the project has test TODO: recursively search

# File lib/repocrawler/crawler.rb, line 269
def get_test
  has_test = 0;

  contents = HTTParty.get(@GITHUB_API_BASE_URL + "/contents?access_token=#{@access_token}", headers: {
    "User-Agent" => @user_agent
  })

  if contents.is_a?(Hash) && contents['message'] === 'Not Found'
    return has_test
  else
    test_folder = contents.select do |content|
      match = content['name'] =~ /(spec)|(test)/

      !match.nil? && content['type'] === 'dir'
    end

     test_folder.empty? ? has_test = 0 : has_test = 1
    return has_test
  end
end
get_total_commits() click to toggle source

get the total commits

# File lib/repocrawler/crawler.rb, line 58
def get_total_commits
  contributors = get_contributors

  if contributors.nil?
    commits = nil
  else
    commits = contributors.reduce(0) do |sum, num|
      sum + num['contributions']
    end
  end

  commits
end
get_total_issues() click to toggle source

get total number of issues

# File lib/repocrawler/crawler.rb, line 150
def get_total_issues
  issues_fetch = HTTParty.get(@GITHUB_API_BASE_URL + "/issues?state=all&access_token=#{@access_token}", headers: {
      "User-Agent" => @user_agent
    })

  issues_num = 0
  return issues_num if issues_fetch.is_a?(Hash) && issues_fetch['message'] === 'Not Found'
    
  issues_num = issues_fetch.first['number'] if issues_fetch.count != 0

  issues_num
end