module Rudisco::RubyGemsScanner

Public Instance Methods

corteges_scanning(corteges) click to toggle source

Updates cortege(s).

Used to get latest information about gem(s) total downloads count and so on.

@param [Rudisco::Gem, Array<Rudisco::Gem>] corteges

# File lib/rudisco/models/gem/rubygems_scanner.rb, line 41
def corteges_scanning(corteges)
  rubygems_manage_corteges Array(corteges)
end
deep_scanning(&callback) click to toggle source

Long-term task. Updates database with an actual information. Multithread.

Note: speed limited by rubygems.org.

Can be increased with advanced concurrent processes setup for
sqlite database.

However, this is slow since rubygems.org allowing certain requests
per second count.

@param [Proc<Integer>] callback

Returns count of updated gems, optional.
# File lib/rudisco/models/gem/rubygems_scanner.rb, line 29
def deep_scanning(&callback)
  _deep_scanning &callback
end
surface_scanning() click to toggle source

Scans rubygems.org for new gems. Also marks gems as outdated when new gem version is out.

To load an actual information for newest gems use deep_scanning.

# File lib/rudisco/models/gem/rubygems_scanner.rb, line 11
def surface_scanning
  _surface_scanning
end

Private Instance Methods

_deep_scanning(&callback) click to toggle source
# File lib/rudisco/models/gem/rubygems_scanner.rb, line 86
def _deep_scanning(&callback) # no-doc
  _surface_scanning # todo this call should be optional

  bunch =
    begin
      gem_update_count   = where(need_update: true).count
      threads_count      = gem_update_count > 105 ? 35 : 1
      bunch_sub_arr_size = gem_update_count / threads_count + 1

      bunch_tmp =
        Array.new(threads_count) do |i|
          self.where(need_update: true)
              .order(:id)
              .limit(bunch_sub_arr_size).offset(i * bunch_sub_arr_size)
        end

      bunch_tmp
    end # bunch = [ [], [], [], [], ... [] ]

  threads = []
  bunch.count.times do |i|
    threads << Thread.new(bunch[i], callback) do |sub_array, callback_proc|
      sub_array.each_slice(20) do |gems|
        db.transaction { rubygems_manage_corteges gems }
        callback_proc.call gems.count unless callback_proc.nil?
      end
    end
   end
  threads.each &:join
end
_surface_scanning() click to toggle source
# File lib/rudisco/models/gem/rubygems_scanner.rb, line 47
def _surface_scanning # no-doc
  rubygems =
    begin
      tmp_file = Tempfile.new 'gems.tmp'

      system "gem list --remote > #{tmp_file.path}"
      rubygems = CSV.open tmp_file.path
      rubygems = rubygems.map { |r| r.join.delete('()').split }
      tmp_file.unlink

      rubygems
    end
  sqlite_gems = select_hash :name, :version

  ## ADD NEW GEMS TO DATABASE
  db.transaction do
    new_gems = rubygems.select { |gem| sqlite_gems[gem[0]].nil? }

    new_gems.each do |gem|
      insert name: gem[0], version: gem[1]
    end
  end

  ## UPDATE RECORDS IF NEED
  db.transaction do
    update_gems = rubygems.reject { |gem| sqlite_gems[gem[0]].nil? }
                          .select { |gem| sqlite_gems[gem[0]] != gem[1] }

    update_gems.each do |gem|
      cortege = where(:name => gem[0]).first
      next if cortege[:version] > gem[1] # skip downgraded gems

      cortege[:version] = gem[1]
      cortege[:need_update] = true
      cortege.save
    end
  end
end
rubygems_manage_corteges(corteges) click to toggle source

@param [Array<Rudisco::Gem>] corteges

# File lib/rudisco/models/gem/rubygems_scanner.rb, line 178
def rubygems_manage_corteges(corteges)
  corteges.each do |cortege|
    response = send_request_to_rubygems cortege[:name]

    if response.nil?
      next
    elsif response =~ /could not be found/
      cortege.destroy # gem was deleted from rubygems.org
    else
      data = JSON.parse response
      update_cortege cortege, data

      cortege.save
    end
  end
end
send_request_to_rubygems(name) click to toggle source

@param [String] name

Gem name.

@return [Nil, String]

Returns +nil+ when rubygems.org not responded, otherwise it
returns encoded json as a string.
# File lib/rudisco/models/gem/rubygems_scanner.rb, line 124
def send_request_to_rubygems(name)
  for try_count in 1...25 do
    begin
      sleep try_count * 0.35
      url = URI.parse "https://rubygems.org/api/v1/gems/#{name}.json"
      response = Net::HTTP.get_response(url)

      return response.body if response.is_a? Net::HTTPSuccess
    rescue Errno::ECONNRESET, Net::OpenTimeout
      next
    end
  end

  return nil
end
update_cortege(cortege, hsh) click to toggle source

Updates cortege with a data from hsh.

@param [Rudisco::Gem] cortege

@param [Hash] hsh

# File lib/rudisco/models/gem/rubygems_scanner.rb, line 147
def update_cortege(cortege, hsh)
  cortege[:description] = hsh["info"].to_s
  cortege[:authors]     = hsh["authors"].to_s
  cortege[:license]     = Array(hsh["licenses"]).join
  cortege[:sha]         = hsh["sha"].to_s

  # "$ gem -list" and rubygems.org/api can send different information about
  # last version for a gem. To prevent collision this code not downgrades
  # gem version.
  #
  # In other words "$ gem list" have higher priority under rubygems.org/api
  if hsh["version"].to_s > cortege[:version]
    cortege[:version] = hsh["version"].to_s
  end

  cortege[:source_code_url]   = hsh["source_code_uri"].to_s
  cortege[:project_url]       = hsh["project_uri"].to_s
  cortege[:gem_url]           = hsh["gem_uri"].to_s
  cortege[:wiki_url]          = hsh["wiki_uri"].to_s
  cortege[:documentation_url] = hsh["documentation_uri"].to_s
  cortege[:mailing_list_url]  = hsh["mailing_list_uri"].to_s
  cortege[:bug_tracker_url]   = hsh["bug_tracker_uri"].to_s

  cortege[:total_downloads]   = hsh["downloads"].to_i
  cortege[:version_downloads] = hsh["version_downloads"].to_i

  cortege[:need_update] = false
end