class SiteDiff::Fetch

SiteDiff Data Fetcher. TODO: Rename this to Fetcher.

Public Class Methods

new(cache, paths, interval, concurrency = 3, curl_opts = nil, debug = true, **tags) click to toggle source

Cache is a cache object, see sitediff/cache Paths is a list of sub-paths Tags is a hash of tag names => base URLs.

# File lib/sitediff/fetch.rb, line 13
def initialize(cache,
               paths,
               interval,
               concurrency = 3,
               curl_opts = nil,
               debug = true,
               **tags)
  @cache = cache
  @interval = interval
  @paths = paths
  @tags = tags
  @curl_opts = curl_opts || UriWrapper::DEFAULT_CURL_OPTS
  @concurrency = concurrency
  @debug = debug
end

Public Instance Methods

run(&block) click to toggle source

Fetch all the paths, once per tag. When a path has been fetched for every tag, block will be called with the path, and a hash of tag => UriWrapper::ReadResult objects.

# File lib/sitediff/fetch.rb, line 32
def run(&block)
  @callback = block
  @hydra = Typhoeus::Hydra.new(max_concurrency: @concurrency)
  @paths.each { |path| queue_path(path) }
  @hydra.run
end

Private Instance Methods

process_results(path, results) click to toggle source

Process fetch results

# File lib/sitediff/fetch.rb, line 70
def process_results(path, results)
  return unless results.size == @tags.size

  @callback[path, results]
end
queue_path(path) click to toggle source

Queue a path for fetching

# File lib/sitediff/fetch.rb, line 42
def queue_path(path)
  results = {}

  @tags.each do |tag, base|
    if (res = @cache.get(tag, path))
      results[tag] = res
      process_results(path, results)
    elsif !base
      # We only have the cache, but this item isn't cached!
      results[tag] = UriWrapper::ReadResult.error('Not cached')
      process_results(path, results)
    else
      uri = UriWrapper.new(base + path, @curl_opts, @debug)
      uri.queue(@hydra) do |resl|
        # Insert delay to limit fetching rate
        if @interval != 0
          SiteDiff.log("Waiting #{@interval} milliseconds.", :info)
          sleep(@interval / 1000.0)
        end
        @cache.set(tag, path, resl)
        results[tag] = resl
        process_results(path, results)
      end
    end
  end
end