class NewspaperWorks::ResourceFetcher
in-memory caching fetcher for HTTP GET requests, wraps Faraday.get
Constants
- CACHEABLE_STATUS
only cache following HTTP response codes, per Section 6.1, RFC 7231
Attributes
cache[RW]
Public Class Methods
get(url, stale_after = 3600)
click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 13 def self.get(url, stale_after = 3600) new(stale_after).get(url) end
include?(url)
click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 17 def self.include?(url) return false if cache.nil? cache.keys.include?(url) end
new(stale_after = 3600)
click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 22 def initialize(stale_after = 3600) @stale_after = stale_after # seconds # initialize shared state only if missing: self.class.cache = {} if self.class.cache.nil? end
Public Instance Methods
cache()
click to toggle source
@return [Hash] shared cache state
# File lib/newspaper_works/resource_fetcher.rb, line 33 def cache self.class.cache end
cache_get(url)
click to toggle source
@return [NilClass, Hash] hash of status, response body — or nil if no HIT
# File lib/newspaper_works/resource_fetcher.rb, line 38 def cache_get(url) return unless cache.include?(url) check_expiry(url) # in case of expiration, cache will no longer include URL: return unless cache.include?(url) # return non-expired cache HIT: cache[url] end
check_expiry(url)
click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 67 def check_expiry(url) return unless cache.include?(url) cache.delete(url) if expired(cache[url]) end
expired(record)
click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 72 def expired(record) now = DateTime.now.to_time.to_i # does elapsed seconds between store and now exceed threshold? (now - record['cached_time']) > @stale_after end
get(url)
click to toggle source
# File lib/newspaper_works/resource_fetcher.rb, line 28 def get(url) cache_get(url) || miss_get(url) end
miss_get(url)
click to toggle source
Get URL from original source, by URL; will cache any cachable response
in self.class.cache (shared state).
@param url [String] URL to GET @raise [Faraday::ConnectionFailed] if DNS or TCP connection error. @return [Hash] hash containing status, response headers, response body
# File lib/newspaper_works/resource_fetcher.rb, line 52 def miss_get(url) resp = Faraday.get url # create a new hash from headers result = resp.headers.to_h # add status and body to result['status'] = resp.status result['body'] = resp.body # set (new or replaced previously) cached value for URL: if CACHEABLE_STATUS.include?(resp.status) result['cached_time'] = DateTime.now.to_time.to_i cache[url] = result end result end