class Gempendencies::GemInfo
Cool things to build off of…
forked repos:
for file in `find gem_info -iname "curl_result.json"`; do grep -H 'fork":' $file; done | grep true
archived repos:
for file in `find gem_info -iname "curl_result.json"`; do grep -H 'archived":' $file; done | grep true
most starred:
for file in `find gem_info -iname "curl_result.json"`; do grep -H 'watchers":' $file; done | sed 's/gem_info\/\(.*\).*\/.*ers": \([0-9]*\),/\2 \1/g' | sort -n -r | head -n 20
least starred:
file in `find gem_info -iname "curl_result.json"`; do grep -H 'watchers":' $file; done | sed 's/gem_info\/\(.*\).*\/.*ers": \([0-9]*\),/\2 \1/g' | sort -n | head -n 20
most issues:
for file in `find gem_info -iname "curl_result.json"`; do grep -H 'issues":' $file; done | sed 's/gem_info\/\(.*\).*\/.*sues": \([0-9]*\),/\2 \1/g' | sort -n -r | head -n 20
all svg files scraped and summarized:
find gem_info -name "*.svg" | sed 's/..*\/.*\/.*\/\(.*\)/\1/g' | sort | uniq -c | sort -n
Public Class Methods
new()
click to toggle source
# File lib/gempendencies/gem_info.rb, line 36 def initialize @domain_badge_labels = {} # # note - be sure to double backslash all the CLI backslashes # if File.exist?(".gempendencies/gem_info.txt") # github_urls # else # build_gem_info_txt # end end
Public Instance Methods
aggregate()
click to toggle source
# File lib/gempendencies/gem_info.rb, line 162 def aggregate data = {} total_counts = Hash.new(0) puts "Searching recursively for nested .gempendencies to aggregate" paths = `find . -iname "license_info.yaml"` paths.split("\n").each do |path| "./business_rules/.gempendencies/license_info.yaml" puts path project_name = path.match(/.\/(.*)\/\.gem.*/)[1] licenses = YAML.load_file(path) data[project_name] = licenses licenses.each do |name, count| total_counts[name] += count end end data['TOTALS'] = total_counts # order output columns by most common license puts "-" * 60 puts " Start CSV output" puts "-" * 60 columns = Hash[total_counts.sort{|a,b| b[1] <=> a[1]}].keys puts (['project'] + columns).join(',') data.each do |project_name, counts| print project_name columns.each do |col| print ',' print counts[col] end puts end puts "-" * 60 puts " End CSV output" puts "-" * 60 end
build(load_github_metadata = false)
click to toggle source
# File lib/gempendencies/gem_info.rb, line 197 def build(load_github_metadata = false) if !File.exist?(".gempendencies/gem_info.yaml") build_gem_info_txt end if load_github_metadata @github_urls.each do |url| cleansed = url.gsub(/http[s]*:../, '').gsub('github.com','').gsub('github.io','').gsub("/", " ").gsub(".", "").strip owner, repo = cleansed.split(" ") if owner && repo url = "https://api.github.com/repos/#{owner}/#{repo}" # puts "#{owner} - #{repo} : #{url}" directory = ".gempendencies/#{owner}/#{repo}" `mkdir -p #{directory}` file = "#{directory}/curl_result.json" if File.exist?(file) if (contents = File.read(file)).index("Moved Permanently") json = JSON.parse(contents) url = json['url'] elsif !contents.index("rate limit exceeded") puts "skipping #{file}..." json = JSON.parse(contents) get_badges(json['html_url'], directory) next end end contents = get_repo_json(url, file) json = JSON.parse(contents) get_badges(json['html_url'], directory) end end end end
build_gem_info_txt()
click to toggle source
uses bundler to build the gem_info.txt summarization of all gems used…
# File lib/gempendencies/gem_info.rb, line 47 def build_gem_info_txt # https://gist.github.com/deevis/3211023e2b14e85df6ca908dbc642a2d # https://gist.githubusercontent.com/deevis/3211023e2b14e85df6ca908dbc642a2d/raw/9fe1c9dfedc15b328cbe2e3f64f8198d56bb9795/generate_gem_info.sh `mkdir -p .gempendencies` gem_names = `bundle list`.split("\n").select{|s| s.index("*") && s.index("(")}.map{|s| s.split("*").last.split("(").first.gsub(" ","")} count = gem_names.length puts "Fetching 'gem info' for #{count} dependencies..." gem_info = {} license_counts = Hash.new(0) author_counts = Hash.new(0) gem_names.each_with_index do |gem_name,i| cmd = "gem info #{gem_name}" puts "\n#{i+1}/#{count} #{cmd}" info = `#{cmd}` data = {} info.split("\n").each do |line| next unless line.index(":") next if line.index("Installed at") key, value = line.split(": ") key = key.gsub('"', "").gsub(" ", "") case key when "Author", "Authors" key = "Author" value = value.split(", ") value.each{|v| author_counts[v] += 1} when "License", "Licenses" key = "License" value = value.split(", ") value.each{|v| license_counts[v] += 1} end data[key] = value end gem_info[gem_name] = data end File.open(".gempendencies/gem_info.yaml", "w") do |f| f.puts gem_info.to_yaml end author_counts = Hash[author_counts.sort{|a,b| b[1] <=> a[1]}] File.open(".gempendencies/author_info.yaml", "w"){|f| f.puts author_counts.to_yaml} license_counts = Hash[license_counts.sort{|a,b| b[1] <=> a[1]}] File.open(".gempendencies/license_info.yaml", "w"){|f| f.puts license_counts.to_yaml} end
get_badges(url, directory)
click to toggle source
# File lib/gempendencies/gem_info.rb, line 97 def get_badges(url, directory) `rm #{directory}/unknown*.svg` # `rm #{directory}/*.svg` begin doc = Nokogiri::HTML(URI.open(url)) rescue => e puts "Error[#{url}] : #{e.message}" return end articles = doc.css("#readme article") images = articles.css("a img") images.each do |i| canonical_source = i['data-canonical-src'] image_url = i['src'] label = i["alt"]&.gsub(' ','_')&.downcase if canonical_source.nil? || canonical_source.index("yard-docs") puts "Skipping: #{canonical_source || label}" next end extension = canonical_source.split("?").first.scan(/\....$/).first || '.svg' puts " canonical_source: #{canonical_source}" puts " image_url: #{image_url}" puts " label: #{label}" domain = canonical_source.scan(/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n?]+)/).flatten[0] if label (@domain_badge_labels[domain] ||= Set.new) << label else labels = @domain_badge_labels[domain] if labels.nil? puts " ERROR - no known labels for #{domain}" label = "unknown_#{SecureRandom.hex(3)}" elsif labels.length > 1 puts " ERROR - multiple possible labels for #{domain} - #{labels}" label = "unknown_#{SecureRandom.hex(3)}" else label = labels.to_a[0] puts " derived label: #{label}" end end image_name = "#{label}#{extension}" next if File.exist?("#{directory}/#{image_name}") # curl: -L follow redirects cmd = "curl -L #{canonical_source} -o #{directory}/#{image_name}" puts cmd `#{cmd}` end end
get_repo_json(url, file)
click to toggle source
# File lib/gempendencies/gem_info.rb, line 146 def get_repo_json(url, file) cmd = "curl #{url} > #{file}" puts cmd `#{cmd}` if (contents = File.read(file)).index("rate limit exceeded") puts "Exceeded rate limit (#{url})" raise contents elsif contents.index("Moved Permanently") url = JSON.parse(contents)['url'] puts "Moved Permanently to: #{url}" contents = get_repo_json(url, file) end sleep 0.5 contents end
github_urls()
click to toggle source
# File lib/gempendencies/gem_info.rb, line 90 def github_urls @github_urls = `grep "Homepage:" .gempendencies/gem_info.yaml | grep "github" | sed 's/.*page: \\(.*\\)/\\1/g'`.split("\n").uniq #puts @github_urls puts "Got #{@github_urls.length} github urls to process" @github_urls end