class HTMLProofer::Cache
Constants
- DEFAULT_CACHE_FILE_NAME
- DEFAULT_STORAGE_DIR
- URI_REGEXP
Attributes
cache_file[R]
cache_log[R]
exists[R]
storage_dir[R]
Public Class Methods
new(logger, options)
click to toggle source
# File lib/html-proofer/cache.rb, line 19 def initialize(logger, options) @logger = logger @cache_log = {} @cache_datetime = DateTime.now @cache_time = @cache_datetime.to_time if options.nil? || options.empty? define_singleton_method('use_cache?') { false } else define_singleton_method('use_cache?') { true } setup_cache!(options) @parsed_timeframe = parsed_timeframe(options[:timeframe]) end end
Public Instance Methods
add(url, filenames, status, msg = '')
click to toggle source
# File lib/html-proofer/cache.rb, line 66 def add(url, filenames, status, msg = '') return unless use_cache? data = { time: @cache_time, filenames: filenames, status: status, message: msg } @cache_log[clean_url(url)] = data end
clean_url(url)
click to toggle source
# File lib/html-proofer/cache.rb, line 155 def clean_url(url) unescape_url(url) end
detect_url_changes(found, type)
click to toggle source
# File lib/html-proofer/cache.rb, line 79 def detect_url_changes(found, type) found_urls = found.keys.map { |url| clean_url(url) } # if there were no urls, bail return {} if found_urls.empty? existing_urls = @cache_log.keys.map { |url| clean_url(url) } # prepare to add new URLs detected additions = found.reject do |url, _| url = clean_url(url) if existing_urls.include?(url) true else @logger.log :debug, "Adding #{url} to cache check" false end end new_link_count = additions.length new_link_text = pluralize(new_link_count, 'link', 'links') @logger.log :info, "Adding #{new_link_text} to the cache..." # remove from cache URLs that no longer exist deletions = 0 @cache_log.delete_if do |url, _| url = clean_url(url) if found_urls.include?(url) false elsif url_matches_type?(url, type) @logger.log :debug, "Removing #{url} from cache check" deletions += 1 true end end del_link_text = pluralize(deletions, 'link', 'links') @logger.log :info, "Removing #{del_link_text} from the cache..." additions end
load?()
click to toggle source
# File lib/html-proofer/cache.rb, line 129 def load? @load.nil? end
parsed_timeframe(timeframe)
click to toggle source
# File lib/html-proofer/cache.rb, line 49 def parsed_timeframe(timeframe) time, date = timeframe.match(/(\d+)(\D)/).captures time = time.to_i case date when 'M' time_ago(time, :months) when 'w' time_ago(time, :weeks) when 'd' time_ago(time, :days) when 'h' time_ago(time, :hours) else raise ArgumentError, "#{date} is not a valid timeframe!" end end
retrieve_urls(urls, type)
click to toggle source
# File lib/html-proofer/cache.rb, line 133 def retrieve_urls(urls, type) urls_to_check = detect_url_changes(urls, type) @cache_log.each_pair do |url, cache| next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip if url_matches_type?(url, type) urls_to_check[url] = cache['filenames'] # recheck expired links end end urls_to_check end
setup_cache!(options)
click to toggle source
# File lib/html-proofer/cache.rb, line 159 def setup_cache!(options) @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir) cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME @cache_file = File.join(storage_dir, cache_file_name) return unless File.exist?(@cache_file) contents = File.read(@cache_file) @cache_log = contents.empty? ? {} : JSON.parse(contents) end
size()
click to toggle source
# File lib/html-proofer/cache.rb, line 45 def size @cache_log.length end
unescape_url(url)
click to toggle source
FIXME: it seems that Typhoeus actually acts on escaped URLs, but there's no way to get at that information, and the cache stores unescaped URLs. Because of this, some links, such as github.com/search/issues?q=is:open+is:issue+fig are not matched as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
# File lib/html-proofer/cache.rb, line 151 def unescape_url(url) Addressable::URI.unescape(url) end
urls()
click to toggle source
# File lib/html-proofer/cache.rb, line 41 def urls @cache_log['urls'] || [] end
within_timeframe?(time)
click to toggle source
# File lib/html-proofer/cache.rb, line 35 def within_timeframe?(time) return false if time.nil? (@parsed_timeframe..@cache_time).cover?(Time.parse(time)) end
write()
click to toggle source
TODO: Garbage performance–both the external and internal caches need access to this file. Write a proper versioned schema in the future
# File lib/html-proofer/cache.rb, line 125 def write File.write(cache_file, @cache_log.to_json) end
Private Instance Methods
time_ago(measurement, unit)
click to toggle source
# File lib/html-proofer/cache.rb, line 176 def time_ago(measurement, unit) case unit when :months @cache_datetime >> -measurement when :weeks @cache_datetime - measurement * 7 when :days @cache_datetime - measurement when :hours @cache_datetime - Rational(measurement / 24.0) end.to_time end
url_matches_type?(url, type)
click to toggle source
# File lib/html-proofer/cache.rb, line 189 def url_matches_type?(url, type) return true if type == :internal && url !~ URI_REGEXP return true if type == :external && url =~ URI_REGEXP end