class RuboCop::ResultCache
Provides functionality for caching RuboCop
runs. @api private
Constants
- DL_EXTENSIONS
- NON_CHANGING
Attributes
@api private
Public Class Methods
# File lib/rubocop/result_cache.rb, line 82 def self.allow_symlinks_in_cache_location?(config_store) config_store.for_pwd.for_all_cops['AllowSymlinksInCacheRootDirectory'] end
# File lib/rubocop/result_cache.rb, line 76 def self.cache_root(config_store) CacheConfig.root_dir do config_store.for_pwd.for_all_cops['CacheRootDirectory'] end end
Remove old files so that the cache doesn’t grow too big. When the threshold MaxFilesInCache has been exceeded, the oldest 50% of all the files in the cache are removed. The reason for removing so much is that cleaning should be done relatively seldom, since there is a slight risk that some other RuboCop
process was just about to read the file, when there’s parallel execution and the cache is shared.
# File lib/rubocop/result_cache.rb, line 29 def self.cleanup(config_store, verbose, cache_root = nil) return if inhibit_cleanup # OPTIMIZE: For faster testing cache_root ||= cache_root(config_store) return unless File.exist?(cache_root) files, dirs = Find.find(cache_root).partition { |path| File.file?(path) } return unless requires_file_removal?(files.length, config_store) remove_oldest_files(files, dirs, cache_root, verbose) end
# File lib/rubocop/result_cache.rb, line 88 def initialize(file, team, options, config_store, cache_root = nil) cache_root ||= options[:cache_root] cache_root ||= ResultCache.cache_root(config_store) @allow_symlinks_in_cache_location = ResultCache.allow_symlinks_in_cache_location?(config_store) @path = File.join(cache_root, rubocop_checksum, context_checksum(team, options), file_checksum(file, config_store)) @cached_data = CachedData.new(file) @debug = options[:debug] end
Private Class Methods
# File lib/rubocop/result_cache.rb, line 66 def remove_files(files, dirs, remove_count) # Batch file deletions, deleting over 130,000+ files will crash # File.delete. files[0, remove_count].each_slice(10_000).each do |files_slice| File.delete(*files_slice) end dirs.each { |dir| Dir.rmdir(dir) if Dir["#{dir}/*"].empty? } end
# File lib/rubocop/result_cache.rb, line 53 def remove_oldest_files(files, dirs, cache_root, verbose) # Add 1 to half the number of files, so that we remove the file if # there's only 1 left. remove_count = 1 + (files.length / 2) puts "Removing the #{remove_count} oldest files from #{cache_root}" if verbose sorted = files.sort_by { |path| File.mtime(path) } remove_files(sorted, dirs, remove_count) rescue Errno::ENOENT # This can happen if parallel RuboCop invocations try to remove the # same files. No problem. puts $ERROR_INFO if verbose end
# File lib/rubocop/result_cache.rb, line 49 def requires_file_removal?(file_count, config_store) file_count > 1 && file_count > config_store.for_pwd.for_all_cops['MaxFilesInCache'] end
Public Instance Methods
# File lib/rubocop/result_cache.rb, line 101 def debug? @debug end
# File lib/rubocop/result_cache.rb, line 109 def load puts "Loading cache from #{@path}" if debug? @cached_data.from_json(File.read(@path, encoding: Encoding::UTF_8)) end
# File lib/rubocop/result_cache.rb, line 114 def save(offenses) dir = File.dirname(@path) begin FileUtils.mkdir_p(dir) rescue Errno::EACCES, Errno::EROFS => e warn "Couldn't create cache directory. Continuing without cache.\n #{e.message}" return end preliminary_path = "#{@path}_#{rand(1_000_000_000)}" # RuboCop must be in control of where its cached data is stored. A # symbolic link anywhere in the cache directory tree can be an # indication that a symlink attack is being waged. return if symlink_protection_triggered?(dir) File.open(preliminary_path, 'w', encoding: Encoding::UTF_8) do |f| f.write(@cached_data.to_json(offenses)) end # The preliminary path is used so that if there are multiple RuboCop # processes trying to save data for the same inspected file # simultaneously, the only problem we run in to is a competition who gets # to write to the final file. The contents are the same, so no corruption # of data should occur. FileUtils.mv(preliminary_path, @path) end
# File lib/rubocop/result_cache.rb, line 105 def valid? File.exist?(@path) end
Private Instance Methods
# File lib/rubocop/result_cache.rb, line 147 def any_symlink?(path) while path != File.dirname(path) if File.symlink?(path) warn "Warning: #{path} is a symlink, which is not allowed." return true end path = File.dirname(path) end false end
We combine team and options into a single “context” checksum to avoid making file names that are too long for some filesystems to handle. This context is for anything that’s not (1) the RuboCop
executable checksum or (2) the inspected file checksum.
# File lib/rubocop/result_cache.rb, line 233 def context_checksum(team, options) Digest::SHA1.hexdigest([team_checksum(team), relevant_options_digest(options)].join) end
# File lib/rubocop/result_cache.rb, line 190 def digest(path) content = if path.end_with?(*DL_EXTENSIONS) # Shared libraries often contain timestamps of when # they were compiled and other non-stable data. File.basename(path) else File.binread(path) # mtime not reliable end Zlib.crc32(content).to_s end
# File lib/rubocop/result_cache.rb, line 158 def file_checksum(file, config_store) digester = Digest::SHA1.new mode = File.stat(file).mode digester.update("#{file}#{mode}#{config_store.for_file(file).signature}") digester.file(file) digester.hexdigest rescue Errno::ENOENT # Spurious files that come and go should not cause a crash, at least not # here. '_' end
Return a hash of the options given at invocation, minus the ones that have no effect on which offenses and disabled line ranges are found, and thus don’t affect caching.
# File lib/rubocop/result_cache.rb, line 217 def relevant_options_digest(options) options = options.reject { |key, _| NON_CHANGING.include?(key) } options.to_s.gsub(/[^a-z]+/i, '_') end
The checksum of the RuboCop
program running the inspection.
# File lib/rubocop/result_cache.rb, line 175 def rubocop_checksum ResultCache.source_checksum ||= begin digest = Digest::SHA1.new rubocop_extra_features .select { |path| File.file?(path) } .sort! .each do |path| digest << digest(path) end digest << RuboCop::Version::STRING << RuboCop::AST::Version::STRING digest.hexdigest end end
# File lib/rubocop/result_cache.rb, line 201 def rubocop_extra_features lib_root = File.join(File.dirname(__FILE__), '..') exe_root = File.join(lib_root, '..', 'exe') # These are all the files we have `require`d plus everything in the # exe directory. A change to any of them could affect the cop output # so we include them in the cache hash. source_files = $LOADED_FEATURES + Find.find(exe_root).to_a source_files -= ResultCache.rubocop_required_features # Rely on gem versions source_files end
# File lib/rubocop/result_cache.rb, line 143 def symlink_protection_triggered?(path) !@allow_symlinks_in_cache_location && any_symlink?(path) end
The external dependency checksums are cached per RuboCop
team so that the checksums don’t need to be recomputed for each file.
# File lib/rubocop/result_cache.rb, line 224 def team_checksum(team) @checksum_by_team ||= {}.compare_by_identity @checksum_by_team[team] ||= team.external_dependency_checksum end