class OnlineGHAProvider
Public Class Methods
new(max_retries = 3, proactive = false, proactive_pool_size = 10)
click to toggle source
Calls superclass method
GHAProvider::new
# File lib/gh-archive.rb, line 254 def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10) super() self.max_retries(max_retries) self.proactive(proactive_pool_size) if proactive @cache = Cache.new end
Public Instance Methods
cache(current_time)
click to toggle source
# File lib/gh-archive.rb, line 311 def cache(current_time) @logger.info("Full cache. Waiting for some free slot...") if @cache.full? while @cache.full? sleep 1 end @max_retries.times do begin filename = self.get_gha_filename(current_time) URI.open("http://data.gharchive.org/#{filename}") do |gz| content = self.read_gha_file(gz) @cache.put(filename, content) return end rescue Errno::ECONNRESET => e @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message) next rescue OpenURI::HTTPError => e code = e.io.status[0] if code.start_with?("5") @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message) next else raise e end end end end
each(from = Time.gm(2015, 1, 1), to = Time.now)
click to toggle source
Calls superclass method
GHAProvider#each
# File lib/gh-archive.rb, line 339 def each(from = Time.gm(2015, 1, 1), to = Time.now) if @proactive real_from = restore_checkpoint(from) any_ready = Thread.promise @logger.info("Proactively scheduling download tasks...") self.each_time(real_from, to) do |current_time| @pool.process(current_time) do |current_time| cache(current_time) any_ready << true @logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}") end end ~any_ready @logger.info("Download tasks successfully scheduled!") end super end
get(current_time)
click to toggle source
# File lib/gh-archive.rb, line 276 def get(current_time) @max_retries.times do begin filename = self.get_gha_filename(current_time) if @proactive @logger.info("Waiting for cache to have #{current_time}...") unless @cache.has?(filename) while !@cache.has?(filename) sleep 1 end return @cache.get(filename) else URI.open("http://data.gharchive.org/#{filename}") do |gz| return self.read_gha_file(gz) end end rescue Errno::ECONNRESET => e @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message) next rescue OpenURI::HTTPError => e code = e.io.status[0] if code.start_with?("5") @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message) next else raise e end end end raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}." end
max_retries(n)
click to toggle source
# File lib/gh-archive.rb, line 263 def max_retries(n) @max_retries = n return self end
proactive(pool_size = 10)
click to toggle source
# File lib/gh-archive.rb, line 269 def proactive(pool_size = 10) @proactive = true @pool = GHArchive::ThreadPool.new(pool_size) return self end