class SiteDiff::Fetch
SiteDiff
Data Fetcher. TODO: Rename this to Fetcher.
Public Class Methods
new(cache, paths, interval, concurrency = 3, curl_opts = nil, debug = true, **tags)
click to toggle source
Cache
is a cache object, see sitediff/cache Paths is a list of sub-paths Tags is a hash of tag names => base URLs.
# File lib/sitediff/fetch.rb, line 13 def initialize(cache, paths, interval, concurrency = 3, curl_opts = nil, debug = true, **tags) @cache = cache @interval = interval @paths = paths @tags = tags @curl_opts = curl_opts || UriWrapper::DEFAULT_CURL_OPTS @concurrency = concurrency @debug = debug end
Public Instance Methods
run(&block)
click to toggle source
Fetch
all the paths, once per tag. When a path has been fetched for every tag, block will be called with the path, and a hash of tag => UriWrapper::ReadResult
objects.
# File lib/sitediff/fetch.rb, line 32 def run(&block) @callback = block @hydra = Typhoeus::Hydra.new(max_concurrency: @concurrency) @paths.each { |path| queue_path(path) } @hydra.run end
Private Instance Methods
process_results(path, results)
click to toggle source
Process fetch results
# File lib/sitediff/fetch.rb, line 70 def process_results(path, results) return unless results.size == @tags.size @callback[path, results] end
queue_path(path)
click to toggle source
Queue a path for fetching
# File lib/sitediff/fetch.rb, line 42 def queue_path(path) results = {} @tags.each do |tag, base| if (res = @cache.get(tag, path)) results[tag] = res process_results(path, results) elsif !base # We only have the cache, but this item isn't cached! results[tag] = UriWrapper::ReadResult.error('Not cached') process_results(path, results) else uri = UriWrapper.new(base + path, @curl_opts, @debug) uri.queue(@hydra) do |resl| # Insert delay to limit fetching rate if @interval != 0 SiteDiff.log("Waiting #{@interval} milliseconds.", :info) sleep(@interval / 1000.0) end @cache.set(tag, path, resl) results[tag] = resl process_results(path, results) end end end end