class OnlineGHAProvider

Public Class Methods

new(max_retries = 3, proactive = false, proactive_pool_size = 10) click to toggle source
Calls superclass method GHAProvider::new
# File lib/gh-archive.rb, line 254
def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
    super()
    
    self.max_retries(max_retries)
    self.proactive(proactive_pool_size) if proactive
    
    @cache = Cache.new
end

Public Instance Methods

cache(current_time) click to toggle source
# File lib/gh-archive.rb, line 311
def cache(current_time)
    @logger.info("Full cache. Waiting for some free slot...") if @cache.full?
    while @cache.full?
        sleep 1
    end
    @max_retries.times do
        begin
            filename = self.get_gha_filename(current_time)
            URI.open("http://data.gharchive.org/#{filename}") do |gz|
                content = self.read_gha_file(gz)
                @cache.put(filename, content)
                return
            end
        rescue Errno::ECONNRESET => e
            @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
            next
        rescue OpenURI::HTTPError => e
            code = e.io.status[0]
            if code.start_with?("5")
                @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
                next
            else
                raise e
            end
        end
    end
end
each(from = Time.gm(2015, 1, 1), to = Time.now) click to toggle source
Calls superclass method GHAProvider#each
# File lib/gh-archive.rb, line 339
def each(from = Time.gm(2015, 1, 1), to = Time.now)
    if @proactive
        real_from = restore_checkpoint(from)
        any_ready = Thread.promise
        
        @logger.info("Proactively scheduling download tasks...")
        self.each_time(real_from, to) do |current_time|
            @pool.process(current_time) do |current_time|
                cache(current_time)
                any_ready << true
                @logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}")
            end
        end
        
        ~any_ready
        @logger.info("Download tasks successfully scheduled!")
    end
    
    super
end
get(current_time) click to toggle source
# File lib/gh-archive.rb, line 276
def get(current_time)        
    @max_retries.times do
        begin
            filename = self.get_gha_filename(current_time)
            
            if @proactive
                @logger.info("Waiting for cache to have #{current_time}...") unless @cache.has?(filename)
                
                while !@cache.has?(filename)
                    sleep 1
                end

                return @cache.get(filename)
            else
                URI.open("http://data.gharchive.org/#{filename}") do |gz|
                    return self.read_gha_file(gz)
                end
            end
        rescue Errno::ECONNRESET => e
            @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
            next
        rescue OpenURI::HTTPError => e
            code = e.io.status[0]
            if code.start_with?("5")
                @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
                next
            else
                raise e
            end
        end
    end
    
    raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}."
end
max_retries(n) click to toggle source
# File lib/gh-archive.rb, line 263
def max_retries(n)
    @max_retries = n
    
    return self
end
proactive(pool_size = 10) click to toggle source
# File lib/gh-archive.rb, line 269
def proactive(pool_size = 10)
    @proactive = true
    @pool = GHArchive::ThreadPool.new(pool_size)
    
    return self
end