class Fingerprint::Scanner

The scanner class can scan a set of directories and produce an index.

Attributes

digests[R]
recordset[R]

Public Class Methods

new(roots, pwd: Dir.pwd, **options) click to toggle source

Initialize the scanner to scan a given set of directories in order.

options[:excludes]

An array of regular expressions of files to avoid indexing.

options[:output]

An IO where the results will be written.

# File lib/fingerprint/scanner.rb, line 47
def initialize(roots, pwd: Dir.pwd, **options)
        @roots = roots.collect{|root| File.expand_path(root, pwd)}

        @excludes = options[:excludes] || []
        @options = options

        @digests = {}

        @progress = nil

        unless @options[:checksums] and @options[:checksums].size > 0
                @options[:checksums] = DEFAULT_CHECKSUMS
        end

        @options[:checksums].each do |name|
                @digests[name] = CHECKSUMS[name].call
        end

        @callback = nil
end
scan_paths(paths, **options) click to toggle source

A helper function to scan a set of directories.

# File lib/fingerprint/scanner.rb, line 326
def self.scan_paths(paths, **options)
        if options[:output]
                if options.key? :recordset
                        recordset = options[:recordset]
                else
                        recordset = RecordSet.new
                end
                
                options[:recordset] = RecordSetPrinter.new(recordset, options[:output])
        end

        scanner = Scanner.new(paths, **options)

        scanner.scan(options[:recordset])

        return options[:recordset]
end

Public Instance Methods

excluded?(path) click to toggle source

Returns true if the given path should be excluded.

# File lib/fingerprint/scanner.rb, line 205
def excluded?(path)
        @excludes.each do |exclusion|
                if path.match(exclusion)
                        return true
                end
        end

        return false
end
scan(recordset) click to toggle source

Run the scanning process.

# File lib/fingerprint/scanner.rb, line 228
def scan(recordset)
        excluded_count = 0
        processed_count = 0
        processed_size = 0
        directory_count = 0

        total_count = 0
        total_size = 0

        # Estimate the number of files and amount of data to process..
        if @options[:progress]
                @roots.each do |root|
                        Find.find(root) do |path|
                                # Some special files fail here, and this was the simplest fix.
                                Find.prune unless File.exist?(path)
                                
                                if @options[:progress]
                                        $stderr.puts "# Scanning: #{path}"
                                end
                                
                                if excluded?(path)
                                        Find.prune if path.directory?
                                elsif path.symlink?
                                        total_count += 1
                                elsif path.file?
                                        total_count += 1
                                        total_size += File.size(path)
                                end
                        end
                end
        end
        
        if @options[:progress]
                @progress = lambda do |read_size|
                        $stderr.puts "# Progress: File #{processed_count} / #{total_count}; Byte #{processed_size + read_size} / #{total_size} = #{sprintf('%0.3f%%', (processed_size + read_size).to_f / total_size.to_f * 100.0)} (#{read_size}, #{processed_size}, #{total_size})"
                end
        end
        
        @roots.each do |root|
                recordset << header_for(root)
                
                Find.find(root) do |path|
                        # Some special files fail here, and this was the simplest fix.
                        Find.prune unless File.exist?(path)
                        
                        if @options[:progress]
                                $stderr.puts "# Path: #{path.relative_path}"
                        end
                        
                        if excluded?(path)
                                excluded_count += 1
                                
                                if @options[:verbose]
                                        recordset << excluded_record_for(path)
                                end
                                
                                Find.prune if path.directory?
                        elsif path.directory?
                                directory_count += 1
                                
                                recordset << directory_record_for(path)
                        elsif path.symlink?
                                recordset << link_record_for(path)
                                
                                processed_count += 1
                        elsif path.file?
                                recordset << file_record_for(path)

                                processed_count += 1
                                processed_size += File.size(path)
                        else
                                excluded_count += 1
                                
                                if @options[:verbose]
                                        recordset << excluded_record_for(path)
                                end
                        end
                        
                        # Print out a progress summary if requested
                        @progress.call(0) if @progress
                end
        end
        
        summary_message = "#{processed_count} files processed."

        # Output summary
        recordset << Record.new(:summary, summary_message, {
                'summary.directories' => directory_count,
                'summary.files' => processed_count,
                'summary.size' => processed_size,
                'summary.excluded' => excluded_count,
                'summary.time.end' => Time.now
        })
        
        return recordset
end
scan_path(path) click to toggle source
# File lib/fingerprint/scanner.rb, line 215
def scan_path(path)
        return nil if excluded?(path)
        
        @roots.each do |root|
                full_path = Build::Files::Path.join(root, path)
                
                return record_for(full_path)
        end
        
        return nil
end

Protected Instance Methods

blockdev_record_for(path) click to toggle source
# File lib/fingerprint/scanner.rb, line 157
def blockdev_record_for(path)
        metadata = metadata_for(:blockdev, path)
        
        Record.new(:blockdev, path.relative_path, metadata)
end
chardev_record_for(path) click to toggle source
# File lib/fingerprint/scanner.rb, line 163
def chardev_record_for(path)
        metadata = metadata_for(:chardev, path)
        
        Record.new(:chardev, path.relative_path, metadata)
end
digests_for(path) click to toggle source

This code won’t handle multiple threads..

# File lib/fingerprint/scanner.rb, line 84
def digests_for(path)
        total = 0

        @digests.each do |key, digest|
                digest.reset
        end

        File.open(path, "rb") do |file|
                buffer = ""
                while file.read(1024 * 1024 * 10, buffer)
                        total += buffer.bytesize
                        
                        @progress.call(total) if @progress
                        
                        @digests.each do |key, digest|
                                digest << buffer
                        end
                end
        end

        metadata = {}
        
        @digests.each do |key, digest|
                metadata["key." + key] = digest.hexdigest
        end
        
        return metadata
end
directory_record_for(path) click to toggle source

Output a directory header.

# File lib/fingerprint/scanner.rb, line 147
def directory_record_for(path)
        Record.new(:directory, path.relative_path, metadata_for(:directory, path))
end
excluded_record_for(path) click to toggle source

Add information about excluded paths.

# File lib/fingerprint/scanner.rb, line 180
def excluded_record_for(path)
        Record.new(:excluded, path.relative_path)
end
file_record_for(path) click to toggle source

Output a file and associated metadata.

# File lib/fingerprint/scanner.rb, line 170
def file_record_for(path)
        metadata = metadata_for(:file, path)
        
        # Should this be here or in metadata_for?
        # metadata.merge!(digests_for(path))
        
        Record.new(:file, path.relative_path, metadata)
end
header_for(root) click to toggle source

Adds a header for a given path which is mainly version information.

# File lib/fingerprint/scanner.rb, line 74
def header_for(root)
        Record.new(:configuration, File.expand_path(root), {
                'options.extended' => @options[:extended] == true,
                'options.checksums' => @options[:checksums].join(', '),
                'summary.time.start' => Time.now,
                'fingerprint.version' => Fingerprint::VERSION
        })
end
metadata_for(type, path) click to toggle source
# File lib/fingerprint/scanner.rb, line 113
def metadata_for(type, path)
        metadata = {}
        
        if type == :link
                metadata['file.symlink'] = File.readlink(path)
        else
                stat = File.stat(path)

                if type == :file
                        metadata['file.size'] = stat.size
                        digests = digests_for(path)
                        metadata.merge!(digests)
                elsif type == :blockdev or type == :chardev
                        metadata['file.dev_major'] = stat.dev_major
                        metadata['file.dev_minor'] = stat.dev_minor
                end

                # Extended information
                if @options[:extended]
                        metadata['posix.time.modified'] = File.mtime(path)

                        metadata['posix.mode'] = stat.mode.to_s(8)

                        metadata['posix.permissions.user.id'] = stat.uid
                        metadata['posix.permissions.user.name'] = Etc.getpwuid(stat.uid).name
                        metadata['posix.permissions.group.id'] = stat.gid
                        metadata['posix.permissions.group.name'] = Etc.getgrgid(stat.gid).name
                end
        end
        
        return metadata
end
record_for(path) click to toggle source
# File lib/fingerprint/scanner.rb, line 184
def record_for(path)
        stat = File.stat(path)
        
        if stat.symlink?
                return link_record_for(path)
        elsif stat.blockdev?
                return blockdev_record_for(path)
        elsif stat.chardev?
                return chardev_record_for(path)
        elsif stat.socket?
                return socket_record_for(path)
        elsif stat.file?
                return file_record_for(path)
        end
rescue Errno::ENOENT
        return nil
end