class NewspaperWorks::ResourceFetcher

in-memory caching fetcher for HTTP GET requests, wraps Faraday.get

Constants

CACHEABLE_STATUS

only cache following HTTP response codes, per Section 6.1, RFC 7231

Attributes

cache[RW]

Public Class Methods

get(url, stale_after = 3600) click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 13
def self.get(url, stale_after = 3600)
  new(stale_after).get(url)
end
include?(url) click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 17
def self.include?(url)
  return false if cache.nil?
  cache.keys.include?(url)
end
new(stale_after = 3600) click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 22
def initialize(stale_after = 3600)
  @stale_after = stale_after # seconds
  # initialize shared state only if missing:
  self.class.cache = {} if self.class.cache.nil?
end

Public Instance Methods

cache() click to toggle source

@return [Hash] shared cache state

# File lib/newspaper_works/resource_fetcher.rb, line 33
def cache
  self.class.cache
end
cache_get(url) click to toggle source

@return [NilClass, Hash] hash of status, response body — or nil if no HIT

# File lib/newspaper_works/resource_fetcher.rb, line 38
def cache_get(url)
  return unless cache.include?(url)
  check_expiry(url)
  # in case of expiration, cache will no longer include URL:
  return unless cache.include?(url)
  # return non-expired cache HIT:
  cache[url]
end
check_expiry(url) click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 67
def check_expiry(url)
  return unless cache.include?(url)
  cache.delete(url) if expired(cache[url])
end
expired(record) click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 72
def expired(record)
  now = DateTime.now.to_time.to_i
  # does elapsed seconds between store and now exceed threshold?
  (now - record['cached_time']) > @stale_after
end
get(url) click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 28
def get(url)
  cache_get(url) || miss_get(url)
end
miss_get(url) click to toggle source

Get URL from original source, by URL; will cache any cachable response

in self.class.cache (shared state).

@param url [String] URL to GET @raise [Faraday::ConnectionFailed] if DNS or TCP connection error. @return [Hash] hash containing status, response headers, response body

# File lib/newspaper_works/resource_fetcher.rb, line 52
def miss_get(url)
  resp = Faraday.get url
  # create a new hash from headers
  result = resp.headers.to_h
  # add status and body to
  result['status'] = resp.status
  result['body'] = resp.body
  # set (new or replaced previously) cached value for URL:
  if CACHEABLE_STATUS.include?(resp.status)
    result['cached_time'] = DateTime.now.to_time.to_i
    cache[url] = result
  end
  result
end